load("../data/Frecuencia_De_Accidentes_Semanal.Rda")
load("../data/Dias_Especiales_Semanal.Rda")
Se crean las columnas de accidentes Graves y leves para saber la frecuencia por día
library(reshape)
## Warning: package 'reshape' was built under R version 3.5.3
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
Total_Dataset_Freq_S <- cast(Total_Dataset_Freq_S[,c(1,3,4,5)],ANO+SEMANA~GRAVEDAD)
## Using FREQ as value column. Use the value argument to cast to override this choice
Se agrega la columna TOTAL_ACCIDENTES
Total_Dataset_Freq_S$TOTAL_ACCIDENTES <- Total_Dataset_Freq_S$ACCIDENTES_GRAVES + Total_Dataset_Freq_S$ACCIDENTES_LEVES
Total_Dataset_Freq_S <- sqldf("SELECT*
FROM Total_Dataset_Freq_S FS
LEFT JOIN Dias_Especiales_Semanal DES
ON (FS.ANO=DES.ANO AND FS.SEMANA=DES.SEMANA)")
library(dplyr)
Total_Dataset_Freq_S<-unite_(Total_Dataset_Freq_S, "Ano_Sem", c("ANO..6","SEMANA..8"))
save(Total_Dataset_Freq_S,file="../Modelos/Total_Dataset_Freq_S_semanal.Rda")
Se ajustarán modelos con la información disponible desde el 01 de enero de 2014 hasta el 31 de diciembre de 2017 y se utilizará el año 2018 para validar el modelo:
Train_S_Dataset <- subset(Total_Dataset_Freq_S, ANO!="2018")
summary(Train_S_Dataset$ANO)
## Length Class Mode
## 210 character character
Se ajustan otra vez los niveles del factor ANO
Train_S_Dataset$ANO <- factor(Train_S_Dataset$ANO)
summary(Train_S_Dataset$ANO)
## 2014 2015 2016 2017
## 52 53 53 52
library(sqldf)
Test_S_Dataset <- sqldf("SELECT *
FROM Total_Dataset_Freq_S
WHERE ANO == 2018")
summary(Test_S_Dataset$ANO)
## Length Class Mode
## 52 character character
Se ajustan otra vez los niveles del factor ANO
Test_S_Dataset$ANO <- factor(Test_S_Dataset$ANO)
summary(Test_S_Dataset$ANO)
## 2018
## 52
Se utilizará el método forward selection para elegir las mejores variables explicativas del modelo teniendo como criterio aquellas variables que presente mejor R^2 ajustado
library (leaps)
## Warning: package 'leaps' was built under R version 3.5.3
regfit.fwd=regsubsets (TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,Train_S_Dataset, method ="forward", nvmax= 80)
summary (regfit.fwd)
## Subset selection object
## Call: regsubsets.formula(TOTAL_ACCIDENTES ~ Ano_Base + Ano_Base + SEMANA +
## Feria_Flores_Semana + Semana_Santa_Semana + Feriados_Lunes +
## Feriados_Otros, Train_S_Dataset, method = "forward", nvmax = 80)
## 57 Variables (and intercept)
## Forced in Forced out
## Ano_Base FALSE FALSE
## SEMANA02 FALSE FALSE
## SEMANA03 FALSE FALSE
## SEMANA04 FALSE FALSE
## SEMANA05 FALSE FALSE
## SEMANA06 FALSE FALSE
## SEMANA07 FALSE FALSE
## SEMANA08 FALSE FALSE
## SEMANA09 FALSE FALSE
## SEMANA10 FALSE FALSE
## SEMANA11 FALSE FALSE
## SEMANA12 FALSE FALSE
## SEMANA13 FALSE FALSE
## SEMANA14 FALSE FALSE
## SEMANA15 FALSE FALSE
## SEMANA16 FALSE FALSE
## SEMANA17 FALSE FALSE
## SEMANA18 FALSE FALSE
## SEMANA19 FALSE FALSE
## SEMANA20 FALSE FALSE
## SEMANA21 FALSE FALSE
## SEMANA22 FALSE FALSE
## SEMANA23 FALSE FALSE
## SEMANA24 FALSE FALSE
## SEMANA25 FALSE FALSE
## SEMANA26 FALSE FALSE
## SEMANA27 FALSE FALSE
## SEMANA28 FALSE FALSE
## SEMANA29 FALSE FALSE
## SEMANA30 FALSE FALSE
## SEMANA31 FALSE FALSE
## SEMANA32 FALSE FALSE
## SEMANA33 FALSE FALSE
## SEMANA34 FALSE FALSE
## SEMANA35 FALSE FALSE
## SEMANA36 FALSE FALSE
## SEMANA37 FALSE FALSE
## SEMANA38 FALSE FALSE
## SEMANA39 FALSE FALSE
## SEMANA40 FALSE FALSE
## SEMANA41 FALSE FALSE
## SEMANA42 FALSE FALSE
## SEMANA43 FALSE FALSE
## SEMANA44 FALSE FALSE
## SEMANA45 FALSE FALSE
## SEMANA46 FALSE FALSE
## SEMANA47 FALSE FALSE
## SEMANA48 FALSE FALSE
## SEMANA49 FALSE FALSE
## SEMANA50 FALSE FALSE
## SEMANA51 FALSE FALSE
## SEMANA52 FALSE FALSE
## SEMANA53 FALSE FALSE
## Feria_Flores_Semana FALSE FALSE
## Semana_Santa_Semana FALSE FALSE
## Feriados_Lunes FALSE FALSE
## Feriados_Otros FALSE FALSE
## 1 subsets of each size up to 57
## Selection Algorithm: forward
## Ano_Base SEMANA02 SEMANA03 SEMANA04 SEMANA05 SEMANA06 SEMANA07
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " "
## 5 ( 1 ) " " "*" " " " " " " " " " "
## 6 ( 1 ) " " "*" " " " " " " " " " "
## 7 ( 1 ) " " "*" " " " " " " " " " "
## 8 ( 1 ) " " "*" " " " " " " " " " "
## 9 ( 1 ) " " "*" "*" " " " " " " " "
## 10 ( 1 ) " " "*" "*" "*" " " " " " "
## 11 ( 1 ) "*" "*" "*" "*" " " " " " "
## 12 ( 1 ) "*" "*" "*" "*" " " " " " "
## 13 ( 1 ) "*" "*" "*" "*" " " " " " "
## 14 ( 1 ) "*" "*" "*" "*" " " " " " "
## 15 ( 1 ) "*" "*" "*" "*" " " " " " "
## 16 ( 1 ) "*" "*" "*" "*" " " " " " "
## 17 ( 1 ) "*" "*" "*" "*" " " " " " "
## 18 ( 1 ) "*" "*" "*" "*" " " " " " "
## 19 ( 1 ) "*" "*" "*" "*" " " " " " "
## 20 ( 1 ) "*" "*" "*" "*" " " " " " "
## 21 ( 1 ) "*" "*" "*" "*" " " " " " "
## 22 ( 1 ) "*" "*" "*" "*" " " " " " "
## 23 ( 1 ) "*" "*" "*" "*" " " " " " "
## 24 ( 1 ) "*" "*" "*" "*" " " " " " "
## 25 ( 1 ) "*" "*" "*" "*" " " " " " "
## 26 ( 1 ) "*" "*" "*" "*" " " " " " "
## 27 ( 1 ) "*" "*" "*" "*" " " " " " "
## 28 ( 1 ) "*" "*" "*" "*" " " " " " "
## 29 ( 1 ) "*" "*" "*" "*" " " " " " "
## 30 ( 1 ) "*" "*" "*" "*" " " " " " "
## 31 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 32 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 33 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 34 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 35 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 36 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 37 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 38 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 39 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 40 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 41 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 42 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 43 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 44 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 45 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 46 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 47 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 48 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 49 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 50 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 51 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 52 ( 1 ) "*" "*" "*" "*" " " "*" "*"
## 53 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA08 SEMANA09 SEMANA10 SEMANA11 SEMANA12 SEMANA13 SEMANA14
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " " " " " " "
## 9 ( 1 ) " " " " " " " " " " " " " "
## 10 ( 1 ) " " " " " " " " " " " " " "
## 11 ( 1 ) " " " " " " " " " " " " " "
## 12 ( 1 ) " " " " " " " " " " " " " "
## 13 ( 1 ) " " " " " " " " " " " " " "
## 14 ( 1 ) " " " " " " " " " " " " " "
## 15 ( 1 ) " " " " " " " " " " " " " "
## 16 ( 1 ) " " " " " " " " " " " " " "
## 17 ( 1 ) " " " " " " " " " " " " " "
## 18 ( 1 ) " " " " " " " " " " " " " "
## 19 ( 1 ) " " " " " " " " " " " " " "
## 20 ( 1 ) " " " " "*" " " " " " " " "
## 21 ( 1 ) " " " " "*" " " " " " " " "
## 22 ( 1 ) " " " " "*" " " " " " " "*"
## 23 ( 1 ) " " " " "*" " " " " " " "*"
## 24 ( 1 ) " " " " "*" " " " " " " "*"
## 25 ( 1 ) " " " " "*" " " " " " " "*"
## 26 ( 1 ) " " " " "*" "*" " " " " "*"
## 27 ( 1 ) " " " " "*" "*" " " " " "*"
## 28 ( 1 ) " " " " "*" "*" " " " " "*"
## 29 ( 1 ) " " " " "*" "*" " " " " "*"
## 30 ( 1 ) " " " " "*" "*" " " " " "*"
## 31 ( 1 ) " " " " "*" "*" " " " " "*"
## 32 ( 1 ) " " " " "*" "*" " " " " "*"
## 33 ( 1 ) " " " " "*" "*" " " " " "*"
## 34 ( 1 ) " " " " "*" "*" " " " " "*"
## 35 ( 1 ) " " " " "*" "*" " " " " "*"
## 36 ( 1 ) " " " " "*" "*" " " " " "*"
## 37 ( 1 ) " " " " "*" "*" " " " " "*"
## 38 ( 1 ) " " " " "*" "*" " " " " "*"
## 39 ( 1 ) " " " " "*" "*" " " " " "*"
## 40 ( 1 ) " " " " "*" "*" " " " " "*"
## 41 ( 1 ) "*" " " "*" "*" " " " " "*"
## 42 ( 1 ) "*" " " "*" "*" " " " " "*"
## 43 ( 1 ) "*" " " "*" "*" " " " " "*"
## 44 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 45 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 46 ( 1 ) "*" "*" "*" "*" " " " " "*"
## 47 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 48 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 49 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 50 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 51 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 52 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 53 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA15 SEMANA16 SEMANA17 SEMANA18 SEMANA19 SEMANA20 SEMANA21
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " " " " " " "
## 9 ( 1 ) " " " " " " " " " " " " " "
## 10 ( 1 ) " " " " " " " " " " " " " "
## 11 ( 1 ) " " " " " " " " " " " " " "
## 12 ( 1 ) " " " " " " " " " " " " " "
## 13 ( 1 ) " " " " " " " " " " " " " "
## 14 ( 1 ) " " " " " " "*" " " " " " "
## 15 ( 1 ) " " " " " " "*" " " " " " "
## 16 ( 1 ) " " " " "*" "*" " " " " " "
## 17 ( 1 ) " " " " "*" "*" " " " " " "
## 18 ( 1 ) " " " " "*" "*" " " " " " "
## 19 ( 1 ) " " " " "*" "*" " " " " " "
## 20 ( 1 ) " " " " "*" "*" " " " " " "
## 21 ( 1 ) " " " " "*" "*" " " " " " "
## 22 ( 1 ) " " " " "*" "*" " " " " " "
## 23 ( 1 ) " " " " "*" "*" " " " " " "
## 24 ( 1 ) " " " " "*" "*" " " " " " "
## 25 ( 1 ) "*" " " "*" "*" " " " " " "
## 26 ( 1 ) "*" " " "*" "*" " " " " " "
## 27 ( 1 ) "*" " " "*" "*" " " " " " "
## 28 ( 1 ) "*" " " "*" "*" " " " " " "
## 29 ( 1 ) "*" " " "*" "*" " " " " " "
## 30 ( 1 ) "*" " " "*" "*" " " "*" " "
## 31 ( 1 ) "*" " " "*" "*" " " "*" " "
## 32 ( 1 ) "*" " " "*" "*" " " "*" " "
## 33 ( 1 ) "*" " " "*" "*" "*" "*" " "
## 34 ( 1 ) "*" " " "*" "*" "*" "*" " "
## 35 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 36 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 37 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 38 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 39 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 40 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 41 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 42 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 43 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 44 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 45 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 46 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 47 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 48 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 49 ( 1 ) "*" " " "*" "*" "*" "*" "*"
## 50 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 51 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 52 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 53 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA22 SEMANA23 SEMANA24 SEMANA25 SEMANA26 SEMANA27 SEMANA28
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " "*" " " " "
## 9 ( 1 ) " " " " " " " " "*" " " " "
## 10 ( 1 ) " " " " " " " " "*" " " " "
## 11 ( 1 ) " " " " " " " " "*" " " " "
## 12 ( 1 ) " " " " " " " " "*" " " " "
## 13 ( 1 ) " " " " " " " " "*" " " " "
## 14 ( 1 ) " " " " " " " " "*" " " " "
## 15 ( 1 ) " " " " " " " " "*" " " " "
## 16 ( 1 ) " " " " " " " " "*" " " " "
## 17 ( 1 ) " " " " " " " " "*" " " " "
## 18 ( 1 ) " " " " " " " " "*" " " " "
## 19 ( 1 ) " " " " " " " " "*" " " " "
## 20 ( 1 ) " " " " " " " " "*" " " " "
## 21 ( 1 ) " " " " " " " " "*" " " " "
## 22 ( 1 ) " " " " " " " " "*" " " " "
## 23 ( 1 ) " " " " " " " " "*" " " " "
## 24 ( 1 ) " " " " " " " " "*" " " " "
## 25 ( 1 ) " " " " " " " " "*" " " " "
## 26 ( 1 ) " " " " " " " " "*" " " " "
## 27 ( 1 ) " " " " " " " " "*" " " " "
## 28 ( 1 ) " " " " " " " " "*" " " " "
## 29 ( 1 ) " " "*" " " " " "*" " " " "
## 30 ( 1 ) " " "*" " " " " "*" " " " "
## 31 ( 1 ) " " "*" " " " " "*" " " " "
## 32 ( 1 ) " " "*" " " " " "*" " " " "
## 33 ( 1 ) " " "*" " " " " "*" " " " "
## 34 ( 1 ) " " "*" " " " " "*" " " " "
## 35 ( 1 ) " " "*" " " " " "*" " " " "
## 36 ( 1 ) " " "*" " " " " "*" " " " "
## 37 ( 1 ) " " "*" " " " " "*" " " " "
## 38 ( 1 ) " " "*" " " " " "*" " " " "
## 39 ( 1 ) " " "*" " " " " "*" " " " "
## 40 ( 1 ) " " "*" "*" " " "*" " " " "
## 41 ( 1 ) " " "*" "*" " " "*" " " " "
## 42 ( 1 ) " " "*" "*" " " "*" " " " "
## 43 ( 1 ) " " "*" "*" " " "*" " " "*"
## 44 ( 1 ) " " "*" "*" " " "*" " " "*"
## 45 ( 1 ) " " "*" "*" " " "*" " " "*"
## 46 ( 1 ) " " "*" "*" " " "*" " " "*"
## 47 ( 1 ) " " "*" "*" " " "*" " " "*"
## 48 ( 1 ) " " "*" "*" " " "*" " " "*"
## 49 ( 1 ) " " "*" "*" " " "*" " " "*"
## 50 ( 1 ) " " "*" "*" " " "*" " " "*"
## 51 ( 1 ) " " "*" "*" " " "*" " " "*"
## 52 ( 1 ) " " "*" "*" " " "*" " " "*"
## 53 ( 1 ) " " "*" "*" " " "*" " " "*"
## 54 ( 1 ) "*" "*" "*" " " "*" " " "*"
## 55 ( 1 ) "*" "*" "*" " " "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA29 SEMANA30 SEMANA31 SEMANA32 SEMANA33 SEMANA34 SEMANA35
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " " " " " " "
## 9 ( 1 ) " " " " " " " " " " " " " "
## 10 ( 1 ) " " " " " " " " " " " " " "
## 11 ( 1 ) " " " " " " " " " " " " " "
## 12 ( 1 ) " " " " " " " " " " " " " "
## 13 ( 1 ) " " " " " " " " " " " " " "
## 14 ( 1 ) " " " " " " " " " " " " " "
## 15 ( 1 ) "*" " " " " " " " " " " " "
## 16 ( 1 ) "*" " " " " " " " " " " " "
## 17 ( 1 ) "*" " " " " " " " " " " " "
## 18 ( 1 ) "*" " " " " " " " " " " " "
## 19 ( 1 ) "*" " " " " " " " " " " " "
## 20 ( 1 ) "*" " " " " " " " " " " " "
## 21 ( 1 ) "*" " " " " " " " " " " " "
## 22 ( 1 ) "*" " " " " " " " " " " " "
## 23 ( 1 ) "*" " " " " " " " " " " " "
## 24 ( 1 ) "*" " " " " " " "*" " " " "
## 25 ( 1 ) "*" " " " " " " "*" " " " "
## 26 ( 1 ) "*" " " " " " " "*" " " " "
## 27 ( 1 ) "*" "*" " " " " "*" " " " "
## 28 ( 1 ) "*" "*" " " " " "*" " " "*"
## 29 ( 1 ) "*" "*" " " " " "*" " " "*"
## 30 ( 1 ) "*" "*" " " " " "*" " " "*"
## 31 ( 1 ) "*" "*" " " " " "*" " " "*"
## 32 ( 1 ) "*" "*" "*" " " "*" " " "*"
## 33 ( 1 ) "*" "*" "*" " " "*" " " "*"
## 34 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 35 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 36 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 37 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 38 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 39 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 40 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 41 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 42 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 43 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 44 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 45 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 46 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 47 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 48 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 49 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 50 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 51 ( 1 ) "*" "*" "*" "*" "*" " " "*"
## 52 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 53 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA36 SEMANA37 SEMANA38 SEMANA39 SEMANA40 SEMANA41 SEMANA42
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " " " " " " "
## 9 ( 1 ) " " " " " " " " " " " " " "
## 10 ( 1 ) " " " " " " " " " " " " " "
## 11 ( 1 ) " " " " " " " " " " " " " "
## 12 ( 1 ) " " " " " " " " " " "*" " "
## 13 ( 1 ) " " " " " " " " " " "*" " "
## 14 ( 1 ) " " " " " " " " " " "*" " "
## 15 ( 1 ) " " " " " " " " " " "*" " "
## 16 ( 1 ) " " " " " " " " " " "*" " "
## 17 ( 1 ) " " "*" " " " " " " "*" " "
## 18 ( 1 ) " " "*" "*" " " " " "*" " "
## 19 ( 1 ) " " "*" "*" " " "*" "*" " "
## 20 ( 1 ) " " "*" "*" " " "*" "*" " "
## 21 ( 1 ) " " "*" "*" " " "*" "*" " "
## 22 ( 1 ) " " "*" "*" " " "*" "*" " "
## 23 ( 1 ) " " "*" "*" " " "*" "*" " "
## 24 ( 1 ) " " "*" "*" " " "*" "*" " "
## 25 ( 1 ) " " "*" "*" " " "*" "*" " "
## 26 ( 1 ) " " "*" "*" " " "*" "*" " "
## 27 ( 1 ) " " "*" "*" " " "*" "*" " "
## 28 ( 1 ) " " "*" "*" " " "*" "*" " "
## 29 ( 1 ) " " "*" "*" " " "*" "*" " "
## 30 ( 1 ) " " "*" "*" " " "*" "*" " "
## 31 ( 1 ) " " "*" "*" " " "*" "*" " "
## 32 ( 1 ) " " "*" "*" " " "*" "*" " "
## 33 ( 1 ) " " "*" "*" " " "*" "*" " "
## 34 ( 1 ) " " "*" "*" " " "*" "*" " "
## 35 ( 1 ) " " "*" "*" " " "*" "*" " "
## 36 ( 1 ) " " "*" "*" " " "*" "*" " "
## 37 ( 1 ) " " "*" "*" " " "*" "*" " "
## 38 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 39 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 40 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 41 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 42 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 43 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 44 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 45 ( 1 ) "*" "*" "*" " " "*" "*" " "
## 46 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 47 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 48 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 49 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 50 ( 1 ) "*" "*" "*" "*" "*" "*" " "
## 51 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 52 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 53 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA43 SEMANA44 SEMANA45 SEMANA46 SEMANA47 SEMANA48 SEMANA49
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " " " " " "
## 6 ( 1 ) " " " " " " " " " " " " " "
## 7 ( 1 ) " " " " " " " " " " " " " "
## 8 ( 1 ) " " " " " " " " " " " " " "
## 9 ( 1 ) " " " " " " " " " " " " " "
## 10 ( 1 ) " " " " " " " " " " " " " "
## 11 ( 1 ) " " " " " " " " " " " " " "
## 12 ( 1 ) " " " " " " " " " " " " " "
## 13 ( 1 ) " " " " " " " " " " " " " "
## 14 ( 1 ) " " " " " " " " " " " " " "
## 15 ( 1 ) " " " " " " " " " " " " " "
## 16 ( 1 ) " " " " " " " " " " " " " "
## 17 ( 1 ) " " " " " " " " " " " " " "
## 18 ( 1 ) " " " " " " " " " " " " " "
## 19 ( 1 ) " " " " " " " " " " " " " "
## 20 ( 1 ) " " " " " " " " " " " " " "
## 21 ( 1 ) " " " " " " " " " " " " "*"
## 22 ( 1 ) " " " " " " " " " " " " "*"
## 23 ( 1 ) " " " " " " " " " " " " "*"
## 24 ( 1 ) " " " " " " " " " " " " "*"
## 25 ( 1 ) " " " " " " " " " " " " "*"
## 26 ( 1 ) " " " " " " " " " " " " "*"
## 27 ( 1 ) " " " " " " " " " " " " "*"
## 28 ( 1 ) " " " " " " " " " " " " "*"
## 29 ( 1 ) " " " " " " " " " " " " "*"
## 30 ( 1 ) " " " " " " " " " " " " "*"
## 31 ( 1 ) " " " " " " " " " " " " "*"
## 32 ( 1 ) " " " " " " " " " " " " "*"
## 33 ( 1 ) " " " " " " " " " " " " "*"
## 34 ( 1 ) " " " " " " " " " " " " "*"
## 35 ( 1 ) " " " " " " " " " " " " "*"
## 36 ( 1 ) "*" " " " " " " " " " " "*"
## 37 ( 1 ) "*" " " " " " " " " " " "*"
## 38 ( 1 ) "*" " " " " " " " " " " "*"
## 39 ( 1 ) "*" " " " " "*" " " " " "*"
## 40 ( 1 ) "*" " " " " "*" " " " " "*"
## 41 ( 1 ) "*" " " " " "*" " " " " "*"
## 42 ( 1 ) "*" "*" " " "*" " " " " "*"
## 43 ( 1 ) "*" "*" " " "*" " " " " "*"
## 44 ( 1 ) "*" "*" " " "*" " " " " "*"
## 45 ( 1 ) "*" "*" " " "*" " " "*" "*"
## 46 ( 1 ) "*" "*" " " "*" " " "*" "*"
## 47 ( 1 ) "*" "*" " " "*" " " "*" "*"
## 48 ( 1 ) "*" "*" " " "*" " " "*" "*"
## 49 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 50 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 51 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 52 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 53 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" " " "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*" "*" "*"
## SEMANA50 SEMANA51 SEMANA52 SEMANA53 Feria_Flores_Semana
## 1 ( 1 ) " " " " " " "*" " "
## 2 ( 1 ) " " " " " " "*" " "
## 3 ( 1 ) " " " " " " "*" " "
## 4 ( 1 ) " " " " " " "*" " "
## 5 ( 1 ) " " " " "*" "*" " "
## 6 ( 1 ) " " " " "*" "*" "*"
## 7 ( 1 ) " " " " "*" "*" "*"
## 8 ( 1 ) " " " " "*" "*" "*"
## 9 ( 1 ) " " " " "*" "*" "*"
## 10 ( 1 ) " " " " "*" "*" "*"
## 11 ( 1 ) " " " " "*" "*" "*"
## 12 ( 1 ) " " " " "*" "*" "*"
## 13 ( 1 ) " " "*" "*" "*" "*"
## 14 ( 1 ) " " "*" "*" "*" "*"
## 15 ( 1 ) " " "*" "*" "*" "*"
## 16 ( 1 ) " " "*" "*" "*" "*"
## 17 ( 1 ) " " "*" "*" "*" "*"
## 18 ( 1 ) " " "*" "*" "*" "*"
## 19 ( 1 ) " " "*" "*" "*" "*"
## 20 ( 1 ) " " "*" "*" "*" "*"
## 21 ( 1 ) " " "*" "*" "*" "*"
## 22 ( 1 ) " " "*" "*" "*" "*"
## 23 ( 1 ) "*" "*" "*" "*" "*"
## 24 ( 1 ) "*" "*" "*" "*" "*"
## 25 ( 1 ) "*" "*" "*" "*" "*"
## 26 ( 1 ) "*" "*" "*" "*" "*"
## 27 ( 1 ) "*" "*" "*" "*" "*"
## 28 ( 1 ) "*" "*" "*" "*" "*"
## 29 ( 1 ) "*" "*" "*" "*" "*"
## 30 ( 1 ) "*" "*" "*" "*" "*"
## 31 ( 1 ) "*" "*" "*" "*" "*"
## 32 ( 1 ) "*" "*" "*" "*" "*"
## 33 ( 1 ) "*" "*" "*" "*" "*"
## 34 ( 1 ) "*" "*" "*" "*" "*"
## 35 ( 1 ) "*" "*" "*" "*" "*"
## 36 ( 1 ) "*" "*" "*" "*" "*"
## 37 ( 1 ) "*" "*" "*" "*" "*"
## 38 ( 1 ) "*" "*" "*" "*" "*"
## 39 ( 1 ) "*" "*" "*" "*" "*"
## 40 ( 1 ) "*" "*" "*" "*" "*"
## 41 ( 1 ) "*" "*" "*" "*" "*"
## 42 ( 1 ) "*" "*" "*" "*" "*"
## 43 ( 1 ) "*" "*" "*" "*" "*"
## 44 ( 1 ) "*" "*" "*" "*" "*"
## 45 ( 1 ) "*" "*" "*" "*" "*"
## 46 ( 1 ) "*" "*" "*" "*" "*"
## 47 ( 1 ) "*" "*" "*" "*" "*"
## 48 ( 1 ) "*" "*" "*" "*" "*"
## 49 ( 1 ) "*" "*" "*" "*" "*"
## 50 ( 1 ) "*" "*" "*" "*" "*"
## 51 ( 1 ) "*" "*" "*" "*" "*"
## 52 ( 1 ) "*" "*" "*" "*" "*"
## 53 ( 1 ) "*" "*" "*" "*" "*"
## 54 ( 1 ) "*" "*" "*" "*" "*"
## 55 ( 1 ) "*" "*" "*" "*" "*"
## 56 ( 1 ) "*" "*" "*" "*" "*"
## 57 ( 1 ) "*" "*" "*" "*" "*"
## Semana_Santa_Semana Feriados_Lunes Feriados_Otros
## 1 ( 1 ) " " " " " "
## 2 ( 1 ) "*" " " " "
## 3 ( 1 ) "*" "*" " "
## 4 ( 1 ) "*" "*" " "
## 5 ( 1 ) "*" "*" " "
## 6 ( 1 ) "*" "*" " "
## 7 ( 1 ) "*" "*" "*"
## 8 ( 1 ) "*" "*" "*"
## 9 ( 1 ) "*" "*" "*"
## 10 ( 1 ) "*" "*" "*"
## 11 ( 1 ) "*" "*" "*"
## 12 ( 1 ) "*" "*" "*"
## 13 ( 1 ) "*" "*" "*"
## 14 ( 1 ) "*" "*" "*"
## 15 ( 1 ) "*" "*" "*"
## 16 ( 1 ) "*" "*" "*"
## 17 ( 1 ) "*" "*" "*"
## 18 ( 1 ) "*" "*" "*"
## 19 ( 1 ) "*" "*" "*"
## 20 ( 1 ) "*" "*" "*"
## 21 ( 1 ) "*" "*" "*"
## 22 ( 1 ) "*" "*" "*"
## 23 ( 1 ) "*" "*" "*"
## 24 ( 1 ) "*" "*" "*"
## 25 ( 1 ) "*" "*" "*"
## 26 ( 1 ) "*" "*" "*"
## 27 ( 1 ) "*" "*" "*"
## 28 ( 1 ) "*" "*" "*"
## 29 ( 1 ) "*" "*" "*"
## 30 ( 1 ) "*" "*" "*"
## 31 ( 1 ) "*" "*" "*"
## 32 ( 1 ) "*" "*" "*"
## 33 ( 1 ) "*" "*" "*"
## 34 ( 1 ) "*" "*" "*"
## 35 ( 1 ) "*" "*" "*"
## 36 ( 1 ) "*" "*" "*"
## 37 ( 1 ) "*" "*" "*"
## 38 ( 1 ) "*" "*" "*"
## 39 ( 1 ) "*" "*" "*"
## 40 ( 1 ) "*" "*" "*"
## 41 ( 1 ) "*" "*" "*"
## 42 ( 1 ) "*" "*" "*"
## 43 ( 1 ) "*" "*" "*"
## 44 ( 1 ) "*" "*" "*"
## 45 ( 1 ) "*" "*" "*"
## 46 ( 1 ) "*" "*" "*"
## 47 ( 1 ) "*" "*" "*"
## 48 ( 1 ) "*" "*" "*"
## 49 ( 1 ) "*" "*" "*"
## 50 ( 1 ) "*" "*" "*"
## 51 ( 1 ) "*" "*" "*"
## 52 ( 1 ) "*" "*" "*"
## 53 ( 1 ) "*" "*" "*"
## 54 ( 1 ) "*" "*" "*"
## 55 ( 1 ) "*" "*" "*"
## 56 ( 1 ) "*" "*" "*"
## 57 ( 1 ) "*" "*" "*"
reg.summary =summary(regfit.fwd)
names(reg.summary)
## [1] "which" "rsq" "rss" "adjr2" "cp" "bic" "outmat" "obj"
reg.summary$rsq
## [1] 0.2564840 0.3589831 0.4342450 0.4836158 0.5143629 0.5330118 0.5619310
## [8] 0.5750247 0.5839300 0.5927802 0.5997164 0.6064533 0.6118691 0.6161374
## [15] 0.6209274 0.6259955 0.6303852 0.6350712 0.6391964 0.6434377 0.6466886
## [22] 0.6493843 0.6520138 0.6545914 0.6569814 0.6592532 0.6615622 0.6640333
## [29] 0.6666124 0.6690087 0.6714405 0.6740170 0.6766598 0.6791777 0.6810979
## [36] 0.6824556 0.6838543 0.6854185 0.6869902 0.6886558 0.6903860 0.6921423
## [43] 0.6940457 0.6961530 0.6985117 0.7012379 0.7042655 0.7077893 0.7113063
## [50] 0.7149150 0.7190467 0.7235920 0.7297406 0.7381385 0.7470696 0.7633664
## [57] 0.8113550
Selección de variables con el mejor R^2 ajustado
max_adjr<-which.max (reg.summary$adjr2)
max_adjr
## [1] 57
par(mfrow =c(2,2))
plot(reg.summary$rss ,xlab=" Número de Variables ",ylab=" RSS",
type="l")
plot(reg.summary$adjr2 ,xlab =" Número de Variables ",
ylab=" Adjusted RSq",type="l")
points (max_adjr, reg.summary$adjr2[max_adjr], col ="red",cex =2, pch =20)
plot(regfit.fwd ,scale ="adjr2")
Variables seleccionadas
coef(regfit.fwd ,max_adjr)
## (Intercept) Ano_Base SEMANA02
## 536.60433 16.93497 108.54653
## SEMANA03 SEMANA04 SEMANA05
## 220.46763 224.92818 257.17818
## SEMANA06 SEMANA07 SEMANA08
## 291.17818 308.67818 287.17818
## SEMANA09 SEMANA10 SEMANA11
## 281.92818 334.92818 314.67818
## SEMANA12 SEMANA13 SEMANA14
## 290.06301 286.50708 325.48411
## SEMANA15 SEMANA16 SEMANA17
## 317.98411 266.23411 332.21409
## SEMANA18 SEMANA19 SEMANA20
## 322.03945 308.21763 310.17818
## SEMANA21 SEMANA22 SEMANA23
## 299.96763 265.50708 313.00708
## SEMANA24 SEMANA25 SEMANA26
## 290.46763 245.50708 201.00708
## SEMANA27 SEMANA28 SEMANA29
## 263.33598 283.42818 311.28591
## SEMANA30 SEMANA31 SEMANA32
## 313.46763 336.17995 307.25531
## SEMANA33 SEMANA34 SEMANA35
## 318.71763 275.79653 313.42818
## SEMANA36 SEMANA37 SEMANA38
## 290.92818 340.67818 340.67818
## SEMANA39 SEMANA40 SEMANA41
## 278.42818 335.92818 235.67818
## SEMANA42 SEMANA43 SEMANA44
## 287.33598 292.42818 284.92818
## SEMANA45 SEMANA46 SEMANA47
## 246.58598 293.00708 282.00708
## SEMANA48 SEMANA49 SEMANA50
## 280.17818 299.00000 304.50354
## SEMANA51 SEMANA52 SEMANA53
## 342.46409 187.82536 -241.50000
## Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 98.06828 -167.93643 -46.15780
## Feriados_Otros
## -44.14364
set.seed(123) # fija la semilla del generador de parámetros para que sea reproducible
Se realiza el modelo de regresión lineal con las variables seleccionadas y se revisa el p-valor de cada una para seleccionar las variables definitivas del modelo
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.3
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_s)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -228.461 -26.757 3.867 27.361 191.539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 805.133 3.572 225.417 < 2e-16 ***
## Ano_Base 8.488 3.583 2.369 0.019107 *
## SEMANA02 14.873 5.317 2.797 0.005817 **
## SEMANA03 30.208 5.185 5.827 3.27e-08 ***
## SEMANA04 30.819 5.172 5.959 1.70e-08 ***
## SEMANA05 35.238 5.172 6.813 2.09e-10 ***
## SEMANA06 39.897 5.172 7.714 1.50e-12 ***
## SEMANA07 42.295 5.172 8.177 1.07e-13 ***
## SEMANA08 39.349 5.172 7.608 2.71e-12 ***
## SEMANA09 38.630 5.172 7.469 5.89e-12 ***
## SEMANA10 45.892 5.172 8.873 1.83e-15 ***
## SEMANA11 43.117 5.172 8.336 4.25e-14 ***
## SEMANA12 39.744 5.322 7.468 5.92e-12 ***
## SEMANA13 39.257 5.233 7.502 4.91e-12 ***
## SEMANA14 44.598 5.274 8.456 2.12e-14 ***
## SEMANA15 43.570 5.274 8.261 6.56e-14 ***
## SEMANA16 36.479 5.274 6.917 1.20e-10 ***
## SEMANA17 45.520 5.055 9.006 8.34e-16 ***
## SEMANA18 44.126 5.034 8.766 3.44e-15 ***
## SEMANA19 42.232 5.185 8.146 1.28e-13 ***
## SEMANA20 42.500 5.172 8.217 8.47e-14 ***
## SEMANA21 41.101 5.185 7.928 4.46e-13 ***
## SEMANA22 36.380 5.233 6.952 9.96e-11 ***
## SEMANA23 42.888 5.233 8.196 9.60e-14 ***
## SEMANA24 39.800 5.185 7.677 1.85e-12 ***
## SEMANA25 33.639 5.233 6.428 1.58e-09 ***
## SEMANA26 27.542 5.233 5.263 4.75e-07 ***
## SEMANA27 36.082 5.434 6.641 5.21e-10 ***
## SEMANA28 38.835 5.172 7.509 4.72e-12 ***
## SEMANA29 42.652 5.055 8.438 2.35e-14 ***
## SEMANA30 42.951 5.185 8.284 5.75e-14 ***
## SEMANA31 46.063 5.827 7.906 5.05e-13 ***
## SEMANA32 42.100 5.718 7.363 1.06e-11 ***
## SEMANA33 43.670 5.185 8.423 2.57e-14 ***
## SEMANA34 37.789 5.317 7.108 4.29e-11 ***
## SEMANA35 42.946 5.172 8.303 5.15e-14 ***
## SEMANA36 39.863 5.172 7.707 1.55e-12 ***
## SEMANA37 46.679 5.172 9.025 7.42e-16 ***
## SEMANA38 46.679 5.172 9.025 7.42e-16 ***
## SEMANA39 38.150 5.172 7.376 9.85e-12 ***
## SEMANA40 46.029 5.172 8.899 1.57e-15 ***
## SEMANA41 32.292 5.172 6.244 4.07e-09 ***
## SEMANA42 39.370 5.434 7.246 2.02e-11 ***
## SEMANA43 40.068 5.172 7.747 1.24e-12 ***
## SEMANA44 39.041 5.172 7.548 3.78e-12 ***
## SEMANA45 33.787 5.434 6.218 4.63e-09 ***
## SEMANA46 40.148 5.233 7.672 1.89e-12 ***
## SEMANA47 38.640 5.233 7.384 9.43e-12 ***
## SEMANA48 38.390 5.172 7.422 7.62e-12 ***
## SEMANA49 40.969 5.015 8.169 1.12e-13 ***
## SEMANA50 41.723 5.070 8.229 7.92e-14 ***
## SEMANA51 46.924 5.055 9.283 < 2e-16 ***
## SEMANA52 25.736 5.076 5.070 1.14e-06 ***
## SEMANA53 -23.512 4.364 -5.388 2.67e-07 ***
## Feria_Flores_Semana 13.437 5.372 2.502 0.013424 *
## Semana_Santa_Semana -23.010 6.530 -3.524 0.000562 ***
## Feriados_Lunes -18.830 5.163 -3.647 0.000364 ***
## Feriados_Otros -17.145 7.176 -2.389 0.018113 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.76 on 152 degrees of freedom
## Multiple R-squared: 0.8114, Adjusted R-squared: 0.7406
## F-statistic: 11.47 on 57 and 152 DF, p-value: < 2.2e-16
head(Train_S_Dataset)
## ANO SEMANA ACCIDENTES_GRAVES ACCIDENTES_LEVES TOTAL_ACCIDENTES Ano_Sem
## 1 2014 01 399 285 684 2014_01
## 2 2014 02 318 254 572 2014_02
## 3 2014 03 385 326 711 2014_03
## 4 2014 04 377 350 727 2014_04
## 5 2014 05 420 371 791 2014_05
## 6 2014 06 432 369 801 2014_06
## Ano_Base Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 1 0 0 0 0
## 2 0 0 0 1
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Feriados_Otros
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_s)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -228.461 -26.757 3.867 27.361 191.539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 805.133 3.572 225.417 < 2e-16 ***
## Ano_Base 8.488 3.583 2.369 0.019107 *
## SEMANA02 14.873 5.317 2.797 0.005817 **
## SEMANA03 30.208 5.185 5.827 3.27e-08 ***
## SEMANA04 30.819 5.172 5.959 1.70e-08 ***
## SEMANA05 35.238 5.172 6.813 2.09e-10 ***
## SEMANA06 39.897 5.172 7.714 1.50e-12 ***
## SEMANA07 42.295 5.172 8.177 1.07e-13 ***
## SEMANA08 39.349 5.172 7.608 2.71e-12 ***
## SEMANA09 38.630 5.172 7.469 5.89e-12 ***
## SEMANA10 45.892 5.172 8.873 1.83e-15 ***
## SEMANA11 43.117 5.172 8.336 4.25e-14 ***
## SEMANA12 39.744 5.322 7.468 5.92e-12 ***
## SEMANA13 39.257 5.233 7.502 4.91e-12 ***
## SEMANA14 44.598 5.274 8.456 2.12e-14 ***
## SEMANA15 43.570 5.274 8.261 6.56e-14 ***
## SEMANA16 36.479 5.274 6.917 1.20e-10 ***
## SEMANA17 45.520 5.055 9.006 8.34e-16 ***
## SEMANA18 44.126 5.034 8.766 3.44e-15 ***
## SEMANA19 42.232 5.185 8.146 1.28e-13 ***
## SEMANA20 42.500 5.172 8.217 8.47e-14 ***
## SEMANA21 41.101 5.185 7.928 4.46e-13 ***
## SEMANA22 36.380 5.233 6.952 9.96e-11 ***
## SEMANA23 42.888 5.233 8.196 9.60e-14 ***
## SEMANA24 39.800 5.185 7.677 1.85e-12 ***
## SEMANA25 33.639 5.233 6.428 1.58e-09 ***
## SEMANA26 27.542 5.233 5.263 4.75e-07 ***
## SEMANA27 36.082 5.434 6.641 5.21e-10 ***
## SEMANA28 38.835 5.172 7.509 4.72e-12 ***
## SEMANA29 42.652 5.055 8.438 2.35e-14 ***
## SEMANA30 42.951 5.185 8.284 5.75e-14 ***
## SEMANA31 46.063 5.827 7.906 5.05e-13 ***
## SEMANA32 42.100 5.718 7.363 1.06e-11 ***
## SEMANA33 43.670 5.185 8.423 2.57e-14 ***
## SEMANA34 37.789 5.317 7.108 4.29e-11 ***
## SEMANA35 42.946 5.172 8.303 5.15e-14 ***
## SEMANA36 39.863 5.172 7.707 1.55e-12 ***
## SEMANA37 46.679 5.172 9.025 7.42e-16 ***
## SEMANA38 46.679 5.172 9.025 7.42e-16 ***
## SEMANA39 38.150 5.172 7.376 9.85e-12 ***
## SEMANA40 46.029 5.172 8.899 1.57e-15 ***
## SEMANA41 32.292 5.172 6.244 4.07e-09 ***
## SEMANA42 39.370 5.434 7.246 2.02e-11 ***
## SEMANA43 40.068 5.172 7.747 1.24e-12 ***
## SEMANA44 39.041 5.172 7.548 3.78e-12 ***
## SEMANA45 33.787 5.434 6.218 4.63e-09 ***
## SEMANA46 40.148 5.233 7.672 1.89e-12 ***
## SEMANA47 38.640 5.233 7.384 9.43e-12 ***
## SEMANA48 38.390 5.172 7.422 7.62e-12 ***
## SEMANA49 40.969 5.015 8.169 1.12e-13 ***
## SEMANA50 41.723 5.070 8.229 7.92e-14 ***
## SEMANA51 46.924 5.055 9.283 < 2e-16 ***
## SEMANA52 25.736 5.076 5.070 1.14e-06 ***
## SEMANA53 -23.512 4.364 -5.388 2.67e-07 ***
## Feria_Flores_Semana 13.437 5.372 2.502 0.013424 *
## Semana_Santa_Semana -23.010 6.530 -3.524 0.000562 ***
## Feriados_Lunes -18.830 5.163 -3.647 0.000364 ***
## Feriados_Otros -17.145 7.176 -2.389 0.018113 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.76 on 152 degrees of freedom
## Multiple R-squared: 0.8114, Adjusted R-squared: 0.7406
## F-statistic: 11.47 on 57 and 152 DF, p-value: < 2.2e-16
caret_lm_fit_s
## Linear Regression
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 189, 189, 188, 189, 190, 189, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 62.60188 0.5837995 46.87437
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_lm_s<-predict(caret_lm_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_lm_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_lm_s)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_s = sqrt(mse_tr_lm_s)
mse_tr_lm_s
## [1] 1939.128
RMSE_tr_lm_s
## [1] 44.03553
Calculo MSE y RMSE para los datos de validación
y_test_pred_lm_s<-predict(caret_lm_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_lm_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_lm_s)^2) # calcula el mse de entrenamiento
RMSE_test_lm_s = sqrt(mse_test_lm_s)
mse_test_lm_s
## [1] 4053.676
RMSE_test_lm_s
## [1] 63.66849
Predicción en la muestra
library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_lm_s,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_lm_s,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_s_m)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -122.630 -20.549 3.016 22.611 88.370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 448.462 2.433 184.298 < 2e-16 ***
## Ano_Base 2.305 2.441 0.944 0.346661
## SEMANA02 4.765 3.622 1.316 0.190261
## SEMANA03 14.633 3.532 4.143 5.67e-05 ***
## SEMANA04 12.219 3.524 3.468 0.000683 ***
## SEMANA05 16.946 3.524 4.809 3.61e-06 ***
## SEMANA06 20.612 3.524 5.850 2.92e-08 ***
## SEMANA07 17.837 3.524 5.062 1.18e-06 ***
## SEMANA08 19.481 3.524 5.529 1.37e-07 ***
## SEMANA09 17.940 3.524 5.091 1.04e-06 ***
## SEMANA10 21.982 3.524 6.238 4.18e-09 ***
## SEMANA11 21.674 3.524 6.151 6.51e-09 ***
## SEMANA12 19.114 3.626 5.272 4.56e-07 ***
## SEMANA13 19.136 3.565 5.368 2.93e-07 ***
## SEMANA14 21.308 3.593 5.931 1.96e-08 ***
## SEMANA15 18.979 3.593 5.282 4.35e-07 ***
## SEMANA16 18.088 3.593 5.034 1.34e-06 ***
## SEMANA17 20.034 3.444 5.818 3.41e-08 ***
## SEMANA18 18.651 3.429 5.439 2.10e-07 ***
## SEMANA19 19.120 3.532 5.413 2.36e-07 ***
## SEMANA20 19.070 3.524 5.412 2.38e-07 ***
## SEMANA21 19.155 3.532 5.423 2.26e-07 ***
## SEMANA22 17.013 3.565 4.772 4.25e-06 ***
## SEMANA23 21.260 3.565 5.963 1.66e-08 ***
## SEMANA24 18.641 3.532 5.278 4.44e-07 ***
## SEMANA25 15.266 3.565 4.282 3.27e-05 ***
## SEMANA26 10.847 3.565 3.042 0.002765 **
## SEMANA27 16.873 3.702 4.558 1.05e-05 ***
## SEMANA28 18.419 3.524 5.227 5.59e-07 ***
## SEMANA29 19.564 3.444 5.681 6.61e-08 ***
## SEMANA30 19.669 3.532 5.568 1.14e-07 ***
## SEMANA31 21.456 3.970 5.405 2.45e-07 ***
## SEMANA32 18.155 3.895 4.660 6.85e-06 ***
## SEMANA33 20.799 3.532 5.888 2.41e-08 ***
## SEMANA34 18.776 3.622 5.184 6.83e-07 ***
## SEMANA35 23.455 3.524 6.656 4.80e-10 ***
## SEMANA36 18.145 3.524 5.150 7.97e-07 ***
## SEMANA37 22.770 3.524 6.462 1.33e-09 ***
## SEMANA38 21.845 3.524 6.199 5.09e-09 ***
## SEMANA39 19.104 3.524 5.422 2.27e-07 ***
## SEMANA40 20.646 3.524 5.859 2.78e-08 ***
## SEMANA41 12.562 3.524 3.565 0.000487 ***
## SEMANA42 18.449 3.702 4.984 1.68e-06 ***
## SEMANA43 18.180 3.524 5.159 7.63e-07 ***
## SEMANA44 16.227 3.524 4.605 8.65e-06 ***
## SEMANA45 15.161 3.702 4.096 6.82e-05 ***
## SEMANA46 16.019 3.565 4.493 1.38e-05 ***
## SEMANA47 16.019 3.565 4.493 1.38e-05 ***
## SEMANA48 14.994 3.524 4.255 3.64e-05 ***
## SEMANA49 14.730 3.416 4.311 2.91e-05 ***
## SEMANA50 15.392 3.454 4.456 1.61e-05 ***
## SEMANA51 18.356 3.444 5.330 3.48e-07 ***
## SEMANA52 12.044 3.458 3.483 0.000648 ***
## SEMANA53 -15.820 2.973 -5.321 3.63e-07 ***
## Feria_Flores_Semana 7.089 3.660 1.937 0.054571 .
## Semana_Santa_Semana -13.709 4.449 -3.082 0.002445 **
## Feriados_Lunes -7.126 3.518 -2.026 0.044534 *
## Feriados_Otros -6.658 4.889 -1.362 0.175272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.26 on 152 degrees of freedom
## Multiple R-squared: 0.7116, Adjusted R-squared: 0.6034
## F-statistic: 6.579 on 57 and 152 DF, p-value: < 2.2e-16
caret_lm_fit_s_m
## Linear Regression
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 187, 189, 188, 190, 189, 189, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 41.22433 0.4038684 33.51642
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_lm_s_m<-predict(caret_lm_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_lm_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_lm_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_s_m = sqrt(mse_tr_lm_s_m)
mse_tr_lm_s_m
## [1] 900.0193
RMSE_tr_lm_s_m
## [1] 30.00032
Calculo MSE y RMSE para los datos de validación
y_test_pred_lm_s_m<-predict(caret_lm_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_lm_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_lm_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_lm_s_m = sqrt(mse_test_lm_s_m)
mse_test_lm_s_m
## [1] 3135.736
RMSE_test_lm_s_m
## [1] 55.99764
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_lm_s_m,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
split = ~ANO,
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_lm_s_m,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_s_sd)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -105.831 -15.978 0.582 17.520 103.169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 356.671 2.090 170.673 < 2e-16 ***
## Ano_Base 6.183 2.097 2.949 0.003689 **
## SEMANA02 10.107 3.111 3.249 0.001424 **
## SEMANA03 15.575 3.033 5.134 8.54e-07 ***
## SEMANA04 18.600 3.026 6.147 6.66e-09 ***
## SEMANA05 18.292 3.026 6.045 1.11e-08 ***
## SEMANA06 19.285 3.026 6.373 2.10e-09 ***
## SEMANA07 24.458 3.026 8.082 1.84e-13 ***
## SEMANA08 19.868 3.026 6.565 7.74e-10 ***
## SEMANA09 20.690 3.026 6.837 1.84e-10 ***
## SEMANA10 23.910 3.026 7.901 5.18e-13 ***
## SEMANA11 21.443 3.026 7.086 4.82e-11 ***
## SEMANA12 20.630 3.114 6.625 5.65e-10 ***
## SEMANA13 20.120 3.062 6.571 7.50e-10 ***
## SEMANA14 23.289 3.086 7.548 3.80e-12 ***
## SEMANA15 24.591 3.086 7.969 3.51e-13 ***
## SEMANA16 18.391 3.086 5.960 1.69e-08 ***
## SEMANA17 25.485 2.957 8.618 8.25e-15 ***
## SEMANA18 25.475 2.945 8.650 6.81e-15 ***
## SEMANA19 23.111 3.033 7.619 2.55e-12 ***
## SEMANA20 23.430 3.026 7.743 1.27e-12 ***
## SEMANA21 21.947 3.033 7.235 2.14e-11 ***
## SEMANA22 19.367 3.062 6.325 2.68e-09 ***
## SEMANA23 21.628 3.062 7.064 5.45e-11 ***
## SEMANA24 21.159 3.033 6.975 8.79e-11 ***
## SEMANA25 18.373 3.062 6.001 1.38e-08 ***
## SEMANA26 16.695 3.062 5.453 1.97e-07 ***
## SEMANA27 19.209 3.179 6.042 1.12e-08 ***
## SEMANA28 20.416 3.026 6.746 2.98e-10 ***
## SEMANA29 23.088 2.957 7.807 8.85e-13 ***
## SEMANA30 23.283 3.033 7.675 1.86e-12 ***
## SEMANA31 24.607 3.409 7.218 2.35e-11 ***
## SEMANA32 23.945 3.345 7.158 3.27e-11 ***
## SEMANA33 22.871 3.033 7.540 3.97e-12 ***
## SEMANA34 19.014 3.111 6.112 7.90e-09 ***
## SEMANA35 19.491 3.026 6.441 1.48e-09 ***
## SEMANA36 21.717 3.026 7.177 2.95e-11 ***
## SEMANA37 23.910 3.026 7.901 5.18e-13 ***
## SEMANA38 24.835 3.026 8.207 9.00e-14 ***
## SEMANA39 19.046 3.026 6.294 3.15e-09 ***
## SEMANA40 25.383 3.026 8.388 3.15e-14 ***
## SEMANA41 19.731 3.026 6.520 9.80e-10 ***
## SEMANA42 20.921 3.179 6.581 7.13e-10 ***
## SEMANA43 21.889 3.026 7.233 2.16e-11 ***
## SEMANA44 22.814 3.026 7.539 3.99e-12 ***
## SEMANA45 18.626 3.179 5.859 2.79e-08 ***
## SEMANA46 24.128 3.062 7.880 5.83e-13 ***
## SEMANA47 22.621 3.062 7.388 9.21e-12 ***
## SEMANA48 23.396 3.026 7.731 1.36e-12 ***
## SEMANA49 26.239 2.934 8.943 1.21e-15 ***
## SEMANA50 26.331 2.967 8.876 1.80e-15 ***
## SEMANA51 28.568 2.957 9.660 < 2e-16 ***
## SEMANA52 13.691 2.970 4.610 8.48e-06 ***
## SEMANA53 -7.691 2.553 -3.012 0.003039 **
## Feria_Flores_Semana 6.348 3.143 2.020 0.045160 *
## Semana_Santa_Semana -9.301 3.821 -2.434 0.016075 *
## Feriados_Lunes -11.704 3.021 -3.874 0.000159 ***
## Feriados_Otros -10.487 4.199 -2.498 0.013564 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.28 on 152 degrees of freedom
## Multiple R-squared: 0.7706, Adjusted R-squared: 0.6846
## F-statistic: 8.96 on 57 and 152 DF, p-value: < 2.2e-16
caret_lm_fit_s_sd
## Linear Regression
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 187, 190, 189, 189, 188, 189, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 37.4432 0.5530664 28.47099
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_lm_s_sd<-predict(caret_lm_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_lm_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_lm_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_s_sd = sqrt(mse_tr_lm_s_sd)
mse_tr_lm_s_sd
## [1] 663.8167
RMSE_tr_lm_s_sd
## [1] 25.76464
Calculo MSE y RMSE para los datos de validación
y_test_pred_lm_s_sd<-predict(caret_lm_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_lm_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_lm_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_lm_s_sd = sqrt(mse_test_lm_s_sd)
mse_test_lm_s_sd
## [1] 1110.178
RMSE_test_lm_s_sd
## [1] 33.31933
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_lm_s_sd,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 0.75, y = 0.9))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_lm_s_sd,
name='Modelo lm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 0.75, y = 0.9))
Tipo_de_accidentes= c("Total Accidentes","Accidentes graves","Accidentes leves")
RMSE_Train_lm = round(c(RMSE_tr_lm_s,RMSE_tr_lm_s_m,RMSE_tr_lm_s_sd), 3)
RMSE_Test_lm = round(c(RMSE_test_lm_s,RMSE_test_lm_s_m,RMSE_test_lm_s_sd),3)
Tabla_lm = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_lm,RMSE_Test_lm))
Tabla_lm
## Tipo_de_accidentes RMSE_Train_lm RMSE_Test_lm
## 1 Total Accidentes 44.036 63.668
## 2 Accidentes graves 30 55.998
## 3 Accidentes leves 25.765 33.319
head(Train_S_Dataset)
## ANO SEMANA ACCIDENTES_GRAVES ACCIDENTES_LEVES TOTAL_ACCIDENTES Ano_Sem
## 1 2014 01 399 285 684 2014_01
## 2 2014 02 318 254 572 2014_02
## 3 2014 03 385 326 711 2014_03
## 4 2014 04 377 350 727 2014_04
## 5 2014 05 420 371 791 2014_05
## 6 2014 06 432 369 801 2014_06
## Ano_Base Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 1 0 0 0 0
## 2 0 0 0 1
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Feriados_Otros
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "knn", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_knn_fit_s)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
caret_knn_fit_s
## k-Nearest Neighbors
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 189, 189, 189, 189, 189, 189, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 159.87102 0.1188231 141.52602
## 7 132.51903 0.1583542 107.30246
## 9 87.62084 0.2976933 62.50242
## 11 88.67186 0.2735668 62.98123
## 13 89.06752 0.2630701 63.05815
## 15 88.76619 0.2532501 62.54331
## 17 87.75910 0.2655133 61.60049
## 19 89.37957 0.2236102 61.68659
## 21 88.91507 0.2333045 60.61590
## 23 90.36976 0.1946510 61.38025
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 9.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_knn_s<-predict(caret_knn_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_knn_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_knn_s)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_s = sqrt(mse_tr_knn_s)
mse_tr_knn_s
## [1] 7689.113
RMSE_tr_knn_s
## [1] 87.68759
Calculo MSE y RMSE para los datos de validación
y_test_pred_knn_s<-predict(caret_knn_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_knn_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_knn_s)^2) # calcula el mse de entrenamiento
RMSE_test_knn_s = sqrt(mse_test_knn_s)
mse_test_knn_s
## [1] 2811.723
RMSE_test_knn_s
## [1] 53.02568
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_knn_s,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_knn_s,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "knn", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_knn_fit_s_m)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
caret_knn_fit_s_m
## k-Nearest Neighbors
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 190, 189, 189, 189, 190, 188, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 73.35329 0.1595947 62.60777
## 7 75.04626 0.1755304 61.51550
## 9 53.01555 0.1578638 37.65618
## 11 52.23325 0.1804905 37.39030
## 13 51.74448 0.2025601 37.32592
## 15 51.96793 0.1817838 37.34017
## 17 52.20766 0.1573873 37.14202
## 19 51.82887 0.1802767 36.90829
## 21 52.39492 0.1498191 37.17210
## 23 52.56264 0.1543004 37.28509
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_knn_s_m<-predict(caret_knn_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_knn_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_knn_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_s_m = sqrt(mse_tr_knn_s_m)
mse_tr_knn_s_m
## [1] 2548.135
RMSE_tr_knn_s_m
## [1] 50.47905
Calculo MSE y RMSE para los datos de validación
y_test_pred_knn_s_m<-predict(caret_knn_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_knn_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_knn_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_knn_s_m = sqrt(mse_test_knn_s_m)
mse_test_knn_s_m
## [1] 1990.944
RMSE_test_knn_s_m
## [1] 44.61999
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_knn_s_m,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_knn_s_m,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "knn", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_knn_fit_s_sd)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
caret_knn_fit_s_sd
## k-Nearest Neighbors
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 189, 189, 189, 189, 190, 189, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 85.05207 0.1484905 75.41969
## 7 76.04815 0.1885851 62.96637
## 9 48.81370 0.2628027 36.43669
## 11 47.95157 0.2475786 35.85214
## 13 47.91406 0.2392017 35.38373
## 15 48.16050 0.2135900 35.48826
## 17 47.92511 0.2208918 35.42153
## 19 47.34845 0.2220739 35.00565
## 21 47.32389 0.2199801 35.13971
## 23 47.43791 0.2101234 35.07661
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 21.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_knn_s_sd<-predict(caret_knn_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_knn_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_knn_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_s_sd = sqrt(mse_tr_knn_s_sd)
mse_tr_knn_s_sd
## [1] 2514.099
RMSE_tr_knn_s_sd
## [1] 50.14079
Calculo MSE y RMSE para los datos de validación
y_test_pred_knn_s_sd<-predict(caret_knn_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_knn_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_knn_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_knn_s_sd = sqrt(mse_test_knn_s_sd)
mse_test_knn_s_sd
## [1] 1705.109
RMSE_test_knn_s_sd
## [1] 41.29297
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_knn_s_sd,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_knn_s_sd,
name='Modelo knn',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Accidentes graves","Accidentes leves")
RMSE_Train_knn = round(c(RMSE_tr_knn_s,RMSE_tr_knn_s_m,RMSE_tr_knn_s_sd), 3)
RMSE_Test_knn = round(c(RMSE_test_knn_s,RMSE_test_knn_s_m,RMSE_test_knn_s_sd),3)
Tabla_knn = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_knn,RMSE_Test_knn))
Tabla_knn
## Tipo_de_accidentes RMSE_Train_knn RMSE_Test_knn
## 1 Total Accidentes 87.688 53.026
## 2 Accidentes graves 50.479 44.62
## 3 Accidentes leves 50.141 41.293
glm_fit_s<-glm(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset, family = "poisson")
summary(glm_fit_s)
##
## Call:
## glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + SEMANA + Feria_Flores_Semana +
## Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson",
## data = Train_S_Dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.6985 -0.9555 0.1415 0.9726 7.6856
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.277825 0.022888 274.290 < 2e-16 ***
## Ano_Base 0.020785 0.004869 4.269 1.97e-05 ***
## SEMANA02 0.183999 0.030950 5.945 2.76e-09 ***
## SEMANA03 0.351425 0.029174 12.046 < 2e-16 ***
## SEMANA04 0.358119 0.029008 12.345 < 2e-16 ***
## SEMANA05 0.399148 0.028782 13.868 < 2e-16 ***
## SEMANA06 0.440656 0.028561 15.428 < 2e-16 ***
## SEMANA07 0.461367 0.028454 16.215 < 2e-16 ***
## SEMANA08 0.435861 0.028586 15.247 < 2e-16 ***
## SEMANA09 0.429533 0.028620 15.008 < 2e-16 ***
## SEMANA10 0.491650 0.028300 17.373 < 2e-16 ***
## SEMANA11 0.468370 0.028418 16.482 < 2e-16 ***
## SEMANA12 0.436869 0.029664 14.727 < 2e-16 ***
## SEMANA13 0.435725 0.028991 15.030 < 2e-16 ***
## SEMANA14 0.483469 0.029074 16.629 < 2e-16 ***
## SEMANA15 0.475855 0.029142 16.329 < 2e-16 ***
## SEMANA16 0.407165 0.029494 13.805 < 2e-16 ***
## SEMANA17 0.490445 0.027860 17.604 < 2e-16 ***
## SEMANA18 0.481088 0.027977 17.196 < 2e-16 ***
## SEMANA19 0.461818 0.028578 16.160 < 2e-16 ***
## SEMANA20 0.463122 0.028445 16.281 < 2e-16 ***
## SEMANA21 0.451666 0.028626 15.778 < 2e-16 ***
## SEMANA22 0.410022 0.029146 14.068 < 2e-16 ***
## SEMANA23 0.468283 0.028828 16.244 < 2e-16 ***
## SEMANA24 0.440203 0.028687 15.345 < 2e-16 ***
## SEMANA25 0.383990 0.029281 13.114 < 2e-16 ***
## SEMANA26 0.324261 0.029621 10.947 < 2e-16 ***
## SEMANA27 0.407123 0.030244 13.461 < 2e-16 ***
## SEMANA28 0.431345 0.028610 15.077 < 2e-16 ***
## SEMANA29 0.468959 0.028163 16.651 < 2e-16 ***
## SEMANA30 0.467733 0.028543 16.387 < 2e-16 ***
## SEMANA31 0.491210 0.031121 15.784 < 2e-16 ***
## SEMANA32 0.463948 0.030715 15.105 < 2e-16 ***
## SEMANA33 0.474215 0.028514 16.631 < 2e-16 ***
## SEMANA34 0.422856 0.029519 14.325 < 2e-16 ***
## SEMANA35 0.466915 0.028425 16.426 < 2e-16 ***
## SEMANA36 0.440357 0.028563 15.417 < 2e-16 ***
## SEMANA37 0.498163 0.028267 17.623 < 2e-16 ***
## SEMANA38 0.498163 0.028267 17.623 < 2e-16 ***
## SEMANA39 0.425292 0.028642 14.848 < 2e-16 ***
## SEMANA40 0.492786 0.028294 17.416 < 2e-16 ***
## SEMANA41 0.371983 0.028931 12.858 < 2e-16 ***
## SEMANA42 0.438123 0.030078 14.566 < 2e-16 ***
## SEMANA43 0.442149 0.028553 15.485 < 2e-16 ***
## SEMANA44 0.433154 0.028601 15.145 < 2e-16 ***
## SEMANA45 0.384904 0.030365 12.676 < 2e-16 ***
## SEMANA46 0.444347 0.028962 15.342 < 2e-16 ***
## SEMANA47 0.430144 0.029021 14.822 < 2e-16 ***
## SEMANA48 0.427414 0.028631 14.928 < 2e-16 ***
## SEMANA49 0.452770 0.027968 16.189 < 2e-16 ***
## SEMANA50 0.458286 0.028171 16.268 < 2e-16 ***
## SEMANA51 0.502208 0.027800 18.065 < 2e-16 ***
## SEMANA52 0.303926 0.029276 10.381 < 2e-16 ***
## SEMANA53 -0.618838 0.047479 -13.034 < 2e-16 ***
## Feria_Flores_Semana 0.114436 0.025566 4.476 7.60e-06 ***
## Semana_Santa_Semana -0.249251 0.034801 -7.162 7.94e-13 ***
## Feriados_Lunes -0.059120 0.008736 -6.767 1.31e-11 ***
## Feriados_Otros -0.058858 0.012967 -4.539 5.65e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3138.71 on 209 degrees of freedom
## Residual deviance: 636.17 on 152 degrees of freedom
## AIC: 2540.8
##
## Number of Fisher Scoring iterations: 4
glm_fit_s
##
## Call: glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + SEMANA + Feria_Flores_Semana +
## Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson",
## data = Train_S_Dataset)
##
## Coefficients:
## (Intercept) Ano_Base SEMANA02
## 6.27783 0.02079 0.18400
## SEMANA03 SEMANA04 SEMANA05
## 0.35142 0.35812 0.39915
## SEMANA06 SEMANA07 SEMANA08
## 0.44066 0.46137 0.43586
## SEMANA09 SEMANA10 SEMANA11
## 0.42953 0.49165 0.46837
## SEMANA12 SEMANA13 SEMANA14
## 0.43687 0.43573 0.48347
## SEMANA15 SEMANA16 SEMANA17
## 0.47585 0.40716 0.49045
## SEMANA18 SEMANA19 SEMANA20
## 0.48109 0.46182 0.46312
## SEMANA21 SEMANA22 SEMANA23
## 0.45167 0.41002 0.46828
## SEMANA24 SEMANA25 SEMANA26
## 0.44020 0.38399 0.32426
## SEMANA27 SEMANA28 SEMANA29
## 0.40712 0.43134 0.46896
## SEMANA30 SEMANA31 SEMANA32
## 0.46773 0.49121 0.46395
## SEMANA33 SEMANA34 SEMANA35
## 0.47421 0.42286 0.46691
## SEMANA36 SEMANA37 SEMANA38
## 0.44036 0.49816 0.49816
## SEMANA39 SEMANA40 SEMANA41
## 0.42529 0.49279 0.37198
## SEMANA42 SEMANA43 SEMANA44
## 0.43812 0.44215 0.43315
## SEMANA45 SEMANA46 SEMANA47
## 0.38490 0.44435 0.43014
## SEMANA48 SEMANA49 SEMANA50
## 0.42741 0.45277 0.45829
## SEMANA51 SEMANA52 SEMANA53
## 0.50221 0.30393 -0.61884
## Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 0.11444 -0.24925 -0.05912
## Feriados_Otros
## -0.05886
##
## Degrees of Freedom: 209 Total (i.e. Null); 152 Residual
## Null Deviance: 3139
## Residual Deviance: 636.2 AIC: 2541
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_glm_s<-predict(glm_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_tr_glm_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_glm_s)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_s = sqrt(mse_tr_glm_s)
mse_tr_glm_s
## [1] 1940.939
RMSE_tr_glm_s
## [1] 44.05609
Calculo MSE y RMSE para los datos de validación
y_test_pred_glm_s<-predict(glm_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_test_glm_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_glm_s)^2) # calcula el mse de entrenamiento
## Warning in Train_S_Dataset$TOTAL_ACCIDENTES - y_test_pred_glm_s: longitud
## de objeto mayor no es múltiplo de la longitud de uno menor
RMSE_test_glm_s = sqrt(mse_test_glm_s)
mse_test_glm_s
## [1] 7321.709
RMSE_test_glm_s
## [1] 85.56698
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_glm_s,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_glm_s,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
glm_fit_s_m<-glm(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset, family = "poisson")
summary(glm_fit_s)
##
## Call:
## glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + SEMANA + Feria_Flores_Semana +
## Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson",
## data = Train_S_Dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.6985 -0.9555 0.1415 0.9726 7.6856
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.277825 0.022888 274.290 < 2e-16 ***
## Ano_Base 0.020785 0.004869 4.269 1.97e-05 ***
## SEMANA02 0.183999 0.030950 5.945 2.76e-09 ***
## SEMANA03 0.351425 0.029174 12.046 < 2e-16 ***
## SEMANA04 0.358119 0.029008 12.345 < 2e-16 ***
## SEMANA05 0.399148 0.028782 13.868 < 2e-16 ***
## SEMANA06 0.440656 0.028561 15.428 < 2e-16 ***
## SEMANA07 0.461367 0.028454 16.215 < 2e-16 ***
## SEMANA08 0.435861 0.028586 15.247 < 2e-16 ***
## SEMANA09 0.429533 0.028620 15.008 < 2e-16 ***
## SEMANA10 0.491650 0.028300 17.373 < 2e-16 ***
## SEMANA11 0.468370 0.028418 16.482 < 2e-16 ***
## SEMANA12 0.436869 0.029664 14.727 < 2e-16 ***
## SEMANA13 0.435725 0.028991 15.030 < 2e-16 ***
## SEMANA14 0.483469 0.029074 16.629 < 2e-16 ***
## SEMANA15 0.475855 0.029142 16.329 < 2e-16 ***
## SEMANA16 0.407165 0.029494 13.805 < 2e-16 ***
## SEMANA17 0.490445 0.027860 17.604 < 2e-16 ***
## SEMANA18 0.481088 0.027977 17.196 < 2e-16 ***
## SEMANA19 0.461818 0.028578 16.160 < 2e-16 ***
## SEMANA20 0.463122 0.028445 16.281 < 2e-16 ***
## SEMANA21 0.451666 0.028626 15.778 < 2e-16 ***
## SEMANA22 0.410022 0.029146 14.068 < 2e-16 ***
## SEMANA23 0.468283 0.028828 16.244 < 2e-16 ***
## SEMANA24 0.440203 0.028687 15.345 < 2e-16 ***
## SEMANA25 0.383990 0.029281 13.114 < 2e-16 ***
## SEMANA26 0.324261 0.029621 10.947 < 2e-16 ***
## SEMANA27 0.407123 0.030244 13.461 < 2e-16 ***
## SEMANA28 0.431345 0.028610 15.077 < 2e-16 ***
## SEMANA29 0.468959 0.028163 16.651 < 2e-16 ***
## SEMANA30 0.467733 0.028543 16.387 < 2e-16 ***
## SEMANA31 0.491210 0.031121 15.784 < 2e-16 ***
## SEMANA32 0.463948 0.030715 15.105 < 2e-16 ***
## SEMANA33 0.474215 0.028514 16.631 < 2e-16 ***
## SEMANA34 0.422856 0.029519 14.325 < 2e-16 ***
## SEMANA35 0.466915 0.028425 16.426 < 2e-16 ***
## SEMANA36 0.440357 0.028563 15.417 < 2e-16 ***
## SEMANA37 0.498163 0.028267 17.623 < 2e-16 ***
## SEMANA38 0.498163 0.028267 17.623 < 2e-16 ***
## SEMANA39 0.425292 0.028642 14.848 < 2e-16 ***
## SEMANA40 0.492786 0.028294 17.416 < 2e-16 ***
## SEMANA41 0.371983 0.028931 12.858 < 2e-16 ***
## SEMANA42 0.438123 0.030078 14.566 < 2e-16 ***
## SEMANA43 0.442149 0.028553 15.485 < 2e-16 ***
## SEMANA44 0.433154 0.028601 15.145 < 2e-16 ***
## SEMANA45 0.384904 0.030365 12.676 < 2e-16 ***
## SEMANA46 0.444347 0.028962 15.342 < 2e-16 ***
## SEMANA47 0.430144 0.029021 14.822 < 2e-16 ***
## SEMANA48 0.427414 0.028631 14.928 < 2e-16 ***
## SEMANA49 0.452770 0.027968 16.189 < 2e-16 ***
## SEMANA50 0.458286 0.028171 16.268 < 2e-16 ***
## SEMANA51 0.502208 0.027800 18.065 < 2e-16 ***
## SEMANA52 0.303926 0.029276 10.381 < 2e-16 ***
## SEMANA53 -0.618838 0.047479 -13.034 < 2e-16 ***
## Feria_Flores_Semana 0.114436 0.025566 4.476 7.60e-06 ***
## Semana_Santa_Semana -0.249251 0.034801 -7.162 7.94e-13 ***
## Feriados_Lunes -0.059120 0.008736 -6.767 1.31e-11 ***
## Feriados_Otros -0.058858 0.012967 -4.539 5.65e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3138.71 on 209 degrees of freedom
## Residual deviance: 636.17 on 152 degrees of freedom
## AIC: 2540.8
##
## Number of Fisher Scoring iterations: 4
glm_fit_s_m
##
## Call: glm(formula = ACCIDENTES_GRAVES ~ Ano_Base + SEMANA + Feria_Flores_Semana +
## Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson",
## data = Train_S_Dataset)
##
## Coefficients:
## (Intercept) Ano_Base SEMANA02
## 5.788318 0.009836 0.099524
## SEMANA03 SEMANA04 SEMANA05
## 0.286121 0.245220 0.324299
## SEMANA06 SEMANA07 SEMANA08
## 0.381579 0.338522 0.364261
## SEMANA09 SEMANA10 SEMANA11
## 0.340150 0.402177 0.397580
## SEMANA12 SEMANA13 SEMANA14
## 0.357368 0.359821 0.394598
## SEMANA15 SEMANA16 SEMANA17
## 0.357023 0.341206 0.373905
## SEMANA18 SEMANA19 SEMANA20
## 0.353534 0.359211 0.357888
## SEMANA21 SEMANA22 SEMANA23
## 0.359651 0.325816 0.392989
## SEMANA24 SEMANA25 SEMANA26
## 0.351563 0.296681 0.219235
## SEMANA27 SEMANA28 SEMANA29
## 0.323757 0.347714 0.368714
## SEMANA30 SEMANA31 SEMANA32
## 0.367675 0.392465 0.346583
## SEMANA33 SEMANA34 SEMANA35
## 0.385200 0.354601 0.423857
## SEMANA36 SEMANA37 SEMANA38
## 0.343399 0.413832 0.400137
## SEMANA39 SEMANA40 SEMANA41
## 0.358420 0.382100 0.251165
## SEMANA42 SEMANA43 SEMANA44
## 0.349806 0.343939 0.312661
## SEMANA45 SEMANA46 SEMANA47
## 0.294651 0.309409 0.309212
## SEMANA48 SEMANA49 SEMANA50
## 0.292390 0.288662 0.299181
## SEMANA51 SEMANA52 SEMANA53
## 0.347609 0.240504 -0.703891
## Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 0.108498 -0.256489 -0.039936
## Feriados_Otros
## -0.041126
##
## Degrees of Freedom: 209 Total (i.e. Null); 152 Residual
## Null Deviance: 1654
## Residual Deviance: 458.8 AIC: 2241
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_glm_s_m<-predict(glm_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_tr_glm_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_glm_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_s_m = sqrt(mse_tr_glm_s_m)
mse_tr_glm_s_m
## [1] 897.2075
RMSE_tr_glm_s_m
## [1] 29.95342
Calculo MSE y RMSE para los datos de validación
y_test_pred_glm_s_m<-predict(glm_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_test_glm_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_glm_s_m)^2) # calcula el mse de entrenamiento
## Warning in Train_S_Dataset$ACCIDENTES_GRAVES - y_test_pred_glm_s_m:
## longitud de objeto mayor no es múltiplo de la longitud de uno menor
RMSE_test_glm_s_m = sqrt(mse_test_glm_s_m)
mse_test_glm_s_m
## [1] 2280.648
RMSE_test_glm_s_m
## [1] 47.75613
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_glm_s_m,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_glm_s_m,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
glm_fit_s_sd<-glm(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset, family = "poisson")
summary(glm_fit_s_sd)
##
## Call:
## glm(formula = ACCIDENTES_LEVES ~ Ano_Base + SEMANA + Feria_Flores_Semana +
## Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson",
## data = Train_S_Dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.4088 -0.8213 0.0148 0.9198 6.4250
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.327381 0.036599 145.562 < 2e-16 ***
## Ano_Base 0.034689 0.007316 4.741 2.12e-06 ***
## SEMANA02 0.305737 0.048405 6.316 2.68e-10 ***
## SEMANA03 0.448028 0.045902 9.761 < 2e-16 ***
## SEMANA04 0.515198 0.045126 11.417 < 2e-16 ***
## SEMANA05 0.508763 0.045177 11.262 < 2e-16 ***
## SEMANA06 0.529353 0.045015 11.760 < 2e-16 ***
## SEMANA07 0.630206 0.044260 14.239 < 2e-16 ***
## SEMANA08 0.541229 0.044922 12.048 < 2e-16 ***
## SEMANA09 0.557758 0.044796 12.451 < 2e-16 ***
## SEMANA10 0.619988 0.044333 13.985 < 2e-16 ***
## SEMANA11 0.572673 0.044683 12.816 < 2e-16 ***
## SEMANA12 0.552064 0.046546 11.861 < 2e-16 ***
## SEMANA13 0.546106 0.045546 11.990 < 2e-16 ***
## SEMANA14 0.611089 0.045487 13.434 < 2e-16 ***
## SEMANA15 0.640681 0.045323 14.136 < 2e-16 ***
## SEMANA16 0.504788 0.046348 10.891 < 2e-16 ***
## SEMANA17 0.652152 0.043531 14.981 < 2e-16 ***
## SEMANA18 0.656486 0.043692 15.025 < 2e-16 ***
## SEMANA19 0.606659 0.044655 13.586 < 2e-16 ***
## SEMANA20 0.610962 0.044399 13.761 < 2e-16 ***
## SEMANA21 0.583114 0.044819 13.010 < 2e-16 ***
## SEMANA22 0.531601 0.045705 11.631 < 2e-16 ***
## SEMANA23 0.577994 0.045319 12.754 < 2e-16 ***
## SEMANA24 0.567365 0.044939 12.625 < 2e-16 ***
## SEMANA25 0.509450 0.045861 11.108 < 2e-16 ***
## SEMANA26 0.472169 0.046170 10.227 < 2e-16 ***
## SEMANA27 0.527185 0.047417 11.118 < 2e-16 ***
## SEMANA28 0.552278 0.044837 12.317 < 2e-16 ***
## SEMANA29 0.610870 0.044224 13.813 < 2e-16 ***
## SEMANA30 0.609263 0.044623 13.654 < 2e-16 ***
## SEMANA31 0.631011 0.048329 13.057 < 2e-16 ***
## SEMANA32 0.626940 0.047592 13.173 < 2e-16 ***
## SEMANA33 0.602000 0.044690 13.471 < 2e-16 ***
## SEMANA34 0.522700 0.046451 11.253 < 2e-16 ***
## SEMANA35 0.533560 0.044982 11.862 < 2e-16 ***
## SEMANA36 0.578043 0.044642 12.948 < 2e-16 ***
## SEMANA37 0.619988 0.044333 13.985 < 2e-16 ***
## SEMANA38 0.637170 0.044210 14.412 < 2e-16 ***
## SEMANA39 0.524421 0.045053 11.640 < 2e-16 ***
## SEMANA40 0.647214 0.044138 14.663 < 2e-16 ***
## SEMANA41 0.538447 0.044944 11.980 < 2e-16 ***
## SEMANA42 0.564755 0.047119 11.986 < 2e-16 ***
## SEMANA43 0.581384 0.044617 13.030 < 2e-16 ***
## SEMANA44 0.599235 0.044485 13.471 < 2e-16 ***
## SEMANA45 0.514082 0.047524 10.817 < 2e-16 ***
## SEMANA46 0.628373 0.044961 13.976 < 2e-16 ***
## SEMANA47 0.597291 0.045150 13.229 < 2e-16 ***
## SEMANA48 0.610314 0.044404 13.745 < 2e-16 ***
## SEMANA49 0.669428 0.043325 15.451 < 2e-16 ***
## SEMANA50 0.669221 0.043646 15.333 < 2e-16 ***
## SEMANA51 0.707653 0.043136 16.405 < 2e-16 ***
## SEMANA52 0.396460 0.046321 8.559 < 2e-16 ***
## SEMANA53 -0.496270 0.072955 -6.802 1.03e-11 ***
## Feria_Flores_Semana 0.122084 0.038117 3.203 0.00136 **
## Semana_Santa_Semana -0.241999 0.052486 -4.611 4.01e-06 ***
## Feriados_Lunes -0.083630 0.013185 -6.343 2.25e-10 ***
## Feriados_Otros -0.081229 0.019322 -4.204 2.62e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2008.38 on 209 degrees of freedom
## Residual deviance: 513.12 on 152 degrees of freedom
## AIC: 2245.6
##
## Number of Fisher Scoring iterations: 4
glm_fit_s_sd
##
## Call: glm(formula = ACCIDENTES_LEVES ~ Ano_Base + SEMANA + Feria_Flores_Semana +
## Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson",
## data = Train_S_Dataset)
##
## Coefficients:
## (Intercept) Ano_Base SEMANA02
## 5.32738 0.03469 0.30574
## SEMANA03 SEMANA04 SEMANA05
## 0.44803 0.51520 0.50876
## SEMANA06 SEMANA07 SEMANA08
## 0.52935 0.63021 0.54123
## SEMANA09 SEMANA10 SEMANA11
## 0.55776 0.61999 0.57267
## SEMANA12 SEMANA13 SEMANA14
## 0.55206 0.54611 0.61109
## SEMANA15 SEMANA16 SEMANA17
## 0.64068 0.50479 0.65215
## SEMANA18 SEMANA19 SEMANA20
## 0.65649 0.60666 0.61096
## SEMANA21 SEMANA22 SEMANA23
## 0.58311 0.53160 0.57799
## SEMANA24 SEMANA25 SEMANA26
## 0.56737 0.50945 0.47217
## SEMANA27 SEMANA28 SEMANA29
## 0.52718 0.55228 0.61087
## SEMANA30 SEMANA31 SEMANA32
## 0.60926 0.63101 0.62694
## SEMANA33 SEMANA34 SEMANA35
## 0.60200 0.52270 0.53356
## SEMANA36 SEMANA37 SEMANA38
## 0.57804 0.61999 0.63717
## SEMANA39 SEMANA40 SEMANA41
## 0.52442 0.64721 0.53845
## SEMANA42 SEMANA43 SEMANA44
## 0.56475 0.58138 0.59924
## SEMANA45 SEMANA46 SEMANA47
## 0.51408 0.62837 0.59729
## SEMANA48 SEMANA49 SEMANA50
## 0.61031 0.66943 0.66922
## SEMANA51 SEMANA52 SEMANA53
## 0.70765 0.39646 -0.49627
## Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 0.12208 -0.24200 -0.08363
## Feriados_Otros
## -0.08123
##
## Degrees of Freedom: 209 Total (i.e. Null); 152 Residual
## Null Deviance: 2008
## Residual Deviance: 513.1 AIC: 2246
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_glm_s_sd<-predict(glm_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_tr_glm_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_glm_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_s_sd = sqrt(mse_tr_glm_s_sd)
mse_tr_glm_s_sd
## [1] 669.6687
RMSE_tr_glm_s_sd
## [1] 25.87796
Calculo MSE y RMSE para los datos de validación
y_test_pred_glm_s_sd<-predict(glm_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_test_glm_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_test_pred_glm_s_sd)^2) # calcula el mse de entrenamiento
## Warning in Train_S_Dataset$ACCIDENTES_LEVES - y_test_pred_glm_s_sd:
## longitud de objeto mayor no es múltiplo de la longitud de uno menor
RMSE_test_glm_s_sd = sqrt(mse_test_glm_s_sd)
mse_test_glm_s_sd
## [1] 2241.203
RMSE_test_glm_s_sd
## [1] 47.34135
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_glm_s_sd,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_glm_s_sd,
name='Modelo glm',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total Accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
#### REsumen Modelos Regresión lineal generalizado para los diferentes tipos de accidente
Tipo_de_accidentes= c("Total Accidentes","Accidentes Graves","Accidentes Leves")
RMSE_Train_glm = round(c(RMSE_tr_glm_s,RMSE_tr_glm_s_m,RMSE_tr_glm_s_sd), 3)
RMSE_Test_glm = round(c(RMSE_test_glm_s,RMSE_test_glm_s_m,RMSE_test_glm_s_sd),3)
Tabla_glm = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_glm,RMSE_Test_glm))
Tabla_glm
## Tipo_de_accidentes RMSE_Train_glm RMSE_Test_glm
## 1 Total Accidentes 44.056 85.567
## 2 Accidentes Graves 29.953 47.756
## 3 Accidentes Leves 25.878 47.341
trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,data=Train_S_Dataset,
method = "rpart", trControl = trcntrl,
parms = list(split = "gini"),
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut
## = 10, : These variables have zero variances: SEMANA53
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_s
## CART
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 190, 190, 188, 189, 190, 189, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.00000000 90.03839 0.22597915 60.02933
## 0.01029619 89.86132 0.23564610 60.36643
## 0.02059239 89.40535 0.23917583 60.42981
## 0.03088858 89.40535 0.23917583 60.42981
## 0.04118477 89.40535 0.23917583 60.42981
## 0.05148097 91.10976 0.21309430 61.82791
## 0.06177716 92.78224 0.18066210 62.41575
## 0.07207335 94.84337 0.14490896 64.49534
## 0.08236955 98.03159 0.09107280 68.27761
## 0.09266574 99.56088 0.03930042 70.17119
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.04118477.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_tree_s<-predict(caret_tree_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_tree_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_tree_s)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_s = sqrt(mse_tr_tree_s)
mse_tr_tree_s
## [1] 8571.789
RMSE_tr_tree_s
## [1] 92.58396
Calculo MSE y RMSE para los datos de validación
y_test_pred_tree_s<-predict(caret_tree_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_tree_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_tree_s)^2) # calcula el mse de entrenamiento
RMSE_test_tree_s = sqrt(mse_test_tree_s)
mse_test_tree_s
## [1] 3909.887
RMSE_test_tree_s
## [1] 62.52909
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_tree_s,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_tree_s,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,data=Train_S_Dataset,
method = "rpart", trControl = trcntrl,
parms = list(split = "gini"),
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_s_m
## CART
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 189, 190, 188, 190, 189, 190, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.000000000 52.11449 0.10711890 36.62441
## 0.009507413 51.37813 0.11978591 36.27833
## 0.019014826 50.59740 0.14649515 35.84655
## 0.028522239 50.59740 0.14649515 35.84655
## 0.038029652 51.74154 0.09941232 36.80310
## 0.047537065 52.32083 0.09155580 37.29817
## 0.057044478 53.15921 0.06724254 37.87887
## 0.066551891 52.92938 0.07832094 37.79743
## 0.076059304 53.88846 0.03710721 38.80926
## 0.085566717 53.88846 0.03710721 38.80926
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.02852224.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_tree_s_m<-predict(caret_tree_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_tree_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_tree_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_s_m = sqrt(mse_tr_tree_s_m)
mse_tr_tree_s_m
## [1] 2702.14
RMSE_tr_tree_s_m
## [1] 51.98211
Calculo MSE y RMSE para los datos de validación
y_test_pred_tree_s_m<-predict(caret_tree_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_tree_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_tree_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_tree_s_m = sqrt(mse_test_tree_s_m)
mse_test_tree_s_m
## [1] 2493.983
RMSE_test_tree_s_m
## [1] 49.93979
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_tree_s_m,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_tree_s_m,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,data=Train_S_Dataset,
method = "rpart", trControl = trcntrl,
parms = list(split = "gini"),
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_s_sd
## CART
##
## 210 samples
## 6 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 188, 190, 190, 188, 188, 190, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.000000000 48.46375 0.15751144 34.53645
## 0.008958719 48.66304 0.14614145 34.69539
## 0.017917439 48.38173 0.15074617 34.34427
## 0.026876158 48.38173 0.15074617 34.34427
## 0.035834877 48.38173 0.15074617 34.34427
## 0.044793597 49.01975 0.12905765 34.52015
## 0.053752316 49.01975 0.12905765 34.52015
## 0.062711035 49.46213 0.12006180 34.77398
## 0.071669755 51.71397 0.03995370 36.56575
## 0.080628474 52.35869 0.02865765 37.31018
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.03583488.
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_tree_s_sd<-predict(caret_tree_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_tree_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_tree_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_s_sd = sqrt(mse_tr_tree_s_sd)
mse_tr_tree_s_sd
## [1] 2471.872
RMSE_tr_tree_s_sd
## [1] 49.71793
Calculo MSE y RMSE para los datos de validación
y_test_pred_tree_s_sd<-predict(caret_tree_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_tree_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_tree_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_tree_s_sd = sqrt(mse_test_tree_s_sd)
mse_test_tree_s_sd
## [1] 1766.186
RMSE_test_tree_s_sd
## [1] 42.02602
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_tree_s_sd,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_tree_s_sd,
name='Modelo tree',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Accidentes Graves","Accidentes Leves")
RMSE_Train_tree = round(c(RMSE_tr_tree_s,RMSE_tr_tree_s_m,RMSE_tr_tree_s_sd), 3)
RMSE_Test_tree = round(c(RMSE_test_tree_s,RMSE_test_tree_s_m,RMSE_test_tree_s_sd),3)
Tabla_tree = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_tree,RMSE_Test_tree))
Tabla_tree
## Tipo_de_accidentes RMSE_Train_tree RMSE_Test_tree
## 1 Total Accidentes 92.584 62.529
## 2 Accidentes Graves 51.982 49.94
## 3 Accidentes Leves 49.718 42.026
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 210 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 210 -none- numeric
## importance 57 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 44100 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 210 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
caret_rf_fit_s
## Random Forest
##
## 210 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 188, 187, 189, 190, 190, 190, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 78.31259 0.5147516 55.17002
## 29 80.39827 0.3457506 60.11523
## 57 82.26926 0.3282961 62.07319
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 2.
plot(caret_rf_fit_s)
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_rf_s<-predict(caret_rf_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_rf_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_rf_s)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s = sqrt(mse_tr_rf_s)
mse_tr_rf_s
## [1] 5745.099
RMSE_tr_rf_s
## [1] 75.79643
Calculo MSE y RMSE para los datos de validación
y_test_pred_rf_s<-predict(caret_rf_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_rf_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_rf_s)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s = sqrt(mse_test_rf_s)
mse_test_rf_s
## [1] 3400.533
RMSE_test_rf_s
## [1] 58.31409
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_rf_s,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~TOTAL_ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_rf_s,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_m)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 210 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 210 -none- numeric
## importance 57 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 44100 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 210 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
caret_rf_fit_s_m
## Random Forest
##
## 210 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 188, 189, 189, 188, 190, 189, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 47.53115 0.3604442 34.49706
## 29 46.16836 0.2951952 35.33219
## 57 46.97492 0.2832970 36.04291
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 29.
plot(caret_rf_fit_s_m)
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_rf_s_m<-predict(caret_rf_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_rf_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_rf_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s_m = sqrt(mse_tr_rf_s_m)
mse_tr_rf_s_m
## [1] 849.2143
RMSE_tr_rf_s_m
## [1] 29.14128
Calculo MSE y RMSE para los datos de validación
y_test_pred_rf_s_m<-predict(caret_rf_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_rf_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_rf_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s_m = sqrt(mse_test_rf_s_m)
mse_test_rf_s_m
## [1] 2512.328
RMSE_test_rf_s_m
## [1] 50.12313
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_rf_s_m,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_GRAVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_rf_s_m,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes graves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes graves"),
legend = list(x = 1, y = 0.4))
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_sd)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 210 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 210 -none- numeric
## importance 57 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 44100 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 210 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
caret_rf_fit_s_sd
## Random Forest
##
## 210 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 190, 189, 189, 188, 188, 190, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 44.56759 0.4451170 31.98620
## 29 47.05826 0.2588825 35.01703
## 57 48.14412 0.2501655 35.95330
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 2.
plot(caret_rf_fit_s_sd)
Calculo MSE y RMSE para los datos de entrenamiento
y_tr_pred_rf_s_sd<-predict(caret_rf_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_rf_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_rf_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s_sd = sqrt(mse_tr_rf_s_sd)
mse_tr_rf_s_sd
## [1] 1764.784
RMSE_tr_rf_s_sd
## [1] 42.00934
Calculo MSE y RMSE para los datos de validación
y_test_pred_rf_s_sd<-predict(caret_rf_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_rf_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_rf_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s_sd = sqrt(mse_test_rf_s_sd)
mse_test_rf_s_sd
## [1] 1392.295
RMSE_test_rf_s_sd
## [1] 37.31347
Predicción en la muestra
plot_ly (data=Train_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_tr_pred_rf_s_sd,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Gráfica serie 2018
plot_ly (data=Test_S_Dataset,
x = ~Ano_Sem,
y = ~ACCIDENTES_LEVES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~y_test_pred_rf_s_sd,
name='Modelo rf',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
layout(title='Total accidentes leves',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes leves"),
legend = list(x = 1, y = 0.4))
Tipo_de_accidentes= c("Total Accidentes","Total Graves","Total Leves")
RMSE_Train_rf = round(c(RMSE_tr_rf_s,RMSE_tr_rf_s_m,RMSE_tr_rf_s_sd), 3)
RMSE_Test_rf = round(c(RMSE_test_rf_s,RMSE_test_rf_s_m,RMSE_test_rf_s_sd),3)
Tabla_rf = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_rf,RMSE_Test_rf))
Tabla_rf
## Tipo_de_accidentes RMSE_Train_rf RMSE_Test_rf
## 1 Total Accidentes 75.796 58.314
## 2 Total Graves 29.141 50.123
## 3 Total Leves 42.009 37.313
Comparación en el entrenamiento
comparacion_tr<-data.frame(Ano_Sem=Total_Dataset_Freq_S$Ano_Sem[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
ACCIDENTES=Total_Dataset_Freq_S$TOTAL_ACCIDENTES[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
lm= y_tr_pred_lm_s,
knn= y_tr_pred_knn_s,
glm=y_tr_pred_glm_s ,
arbol=y_tr_pred_tree_s,
rf=y_tr_pred_rf_s)
plot_ly (data=comparacion_tr,
x = ~Ano_Sem,
y = ~ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Total Accidentes (Entrenamiento)',
xaxis=list(title="Fecha"),
yaxis=list(title="Unidades"),
legend = list(x = 1, y = 0.9))
Comparación en la validación
comparacion_vl<-data.frame(Ano_Sem=Test_S_Dataset$Ano_Sem,
ACCIDENTES=Test_S_Dataset$TOTAL_ACCIDENTES,
lm= y_test_pred_lm_s,
knn= y_test_pred_knn_s,
glm=y_test_pred_glm_s ,
arbol=y_test_pred_tree_s,
rf=y_test_pred_rf_s)
plot_ly (data=comparacion_vl,
x = ~Ano_Sem,
y = ~ACCIDENTES,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Total Accidentes (Validación)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Entrenamiento<-round(c(RMSE_tr_lm_s,RMSE_tr_knn_s,RMSE_tr_glm_s,RMSE_tr_tree_s,RMSE_tr_rf_s),3)
Validacion<-round(c(RMSE_test_lm_s,RMSE_test_knn_s,RMSE_test_glm_s,RMSE_test_tree_s,RMSE_test_rf_s),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres
Cálculo de la variación
ResultadosRMSE$Por_variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
## Entrenamiento Validacion Por_variacion
## lm 44.036 63.668 44.58171
## knn 87.688 53.026 -39.52878
## glm 44.056 85.567 94.22326
## árbol 92.584 62.529 -32.46241
## bosque 75.796 58.314 -23.06454
Comparación en el entrenamiento
comparacion_tr<-data.frame(Ano_Sem=Total_Dataset_Freq_S$Ano_Sem[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
ACCIDENTESG=Total_Dataset_Freq_S$ACCIDENTES_GRAVES[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
lm= y_tr_pred_lm_s_m,
knn= y_tr_pred_knn_s_m,
glm=y_tr_pred_glm_s_m,
arbol=y_tr_pred_tree_s_m,
rf=y_tr_pred_rf_s_m)
plot_ly (data=comparacion_tr,
x = ~Ano_Sem,
y = ~ACCIDENTESG,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm-',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn-',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='black'))%>%
layout(title='Accidentes graves (Entrenamiento)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Comparación en la validación
comparacion_vl<-data.frame(Ano_Sem=Test_S_Dataset$Ano_Sem,
ACCIDENTESG=Test_S_Dataset$ACCIDENTES_GRAVES,
lm= y_test_pred_lm_s_m,
knn= y_test_pred_knn_s_m,
glm=y_test_pred_glm_s_m,
arbol=y_test_pred_tree_s_m,
rf=y_test_pred_rf_s_m)
plot_ly (data=comparacion_vl,
x = ~Ano_Sem,
y = ~ACCIDENTESG,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes graves (Validación)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Entrenamiento<-round(c(RMSE_tr_lm_s_m,RMSE_tr_knn_s_m,RMSE_tr_glm_s_m,RMSE_tr_tree_s_m,RMSE_tr_rf_s_m),3)
Validacion<-round(c(RMSE_test_lm_s_m,RMSE_test_knn_s_m,RMSE_test_glm_s_m,RMSE_test_tree_s_m,RMSE_test_rf_s_m),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres
Cálculo de la variación
ResultadosRMSE$Variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
## Entrenamiento Validacion Variacion
## lm 30.000 55.998 86.660000
## knn 50.479 44.620 -11.606807
## glm 29.953 47.756 59.436450
## árbol 51.982 49.940 -3.928283
## bosque 29.141 50.123 72.001647
Comparación en el entrenamiento
comparacion_tr<-data.frame(Ano_Sem=Total_Dataset_Freq_S$Ano_Sem[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
ACCIDENTESL=Total_Dataset_Freq_S$ACCIDENTES_LEVES[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
lm= y_tr_pred_lm_s_sd,
knn= y_tr_pred_knn_s_sd,
glm=y_tr_pred_glm_s_sd,
arbol=y_tr_pred_tree_s_sd,
rf=y_tr_pred_rf_s_sd)
plot_ly (data=comparacion_tr,
x = ~Ano_Sem,
y = ~ACCIDENTESL,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbo',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes leves (Entrenamiento)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Comparación en la validación
comparacion_vl<-data.frame(Ano_Sem=Test_S_Dataset$Ano_Sem,
ACCIDENTESL=Test_S_Dataset$ACCIDENTES_GRAVES,
lm= y_test_pred_lm_s_sd,
knn= y_test_pred_knn_s_sd,
glm=y_test_pred_glm_s_sd,
arbol=y_test_pred_tree_s_sd,
rf=y_test_pred_rf_s_sd)
plot_ly (data=comparacion_vl,
x = ~Ano_Sem,
y = ~ACCIDENTESL,
type = "scatter" ,mode = "lines",
name='Real',
line=list(width=1,color='rgb(205, 12, 24)'))%>%
add_trace(y= ~lm,
name='lm',
line=list(width=1,color= "blue"))%>%
add_trace(y= ~knn,
name='knn',
line=list(width=1,color="red"))%>%
add_trace(y= ~glm,
name='Modelo Poisson',
line=list(width=1,color='rgb(22, 96, 167)'))%>%
add_trace(y= ~arbol,
name='Árbol',
line=list(width=1,color="green"))%>%
add_trace(y= ~rf,
name='Bosque',
line=list(width=1,color='rgb(255, 51, 153)'))%>%
layout(title='Accidentes leves (Validación)',
xaxis=list(title="Fecha"),
yaxis=list(title="Accidentes"),
legend = list(x = 1, y = 0.9))
Entrenamiento<-round(c(RMSE_tr_lm_s_sd,RMSE_tr_knn_s_sd,RMSE_tr_glm_s_sd,RMSE_tr_tree_s_sd,RMSE_tr_rf_s_sd),3)
Validacion<-round(c(RMSE_test_lm_s_sd,RMSE_test_knn_s_sd,RMSE_test_glm_s_sd,RMSE_test_tree_s_sd,RMSE_test_rf_s_sd),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres
Cálculo de la variación
ResultadosRMSE$Variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
## Entrenamiento Validacion Variacion
## lm 25.765 33.319 29.31884
## knn 50.141 41.293 -17.64624
## glm 25.878 47.341 82.93918
## árbol 49.718 42.026 -15.47126
## bosque 42.009 37.313 -11.17856
Teniendo como criterio el mínimo RMSE en la muestra de validación se eligen los siguientes modelos:
Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018
library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_final = caret::train(TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Total_Dataset_Freq_S,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut
## = 10, : These variables have zero variances: SEMANA53
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient
## fit may be misleading
summary(caret_lm_fit_s_final)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -258.149 -30.832 2.753 32.259 161.851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 799.336 3.242 246.566 < 2e-16 ***
## Ano_Base 1.399 3.250 0.431 0.667276
## SEMANA02 8.473 4.731 1.791 0.074808 .
## SEMANA03 26.585 4.653 5.713 3.88e-08 ***
## SEMANA04 25.603 4.669 5.483 1.23e-07 ***
## SEMANA05 29.168 4.669 6.247 2.40e-09 ***
## SEMANA06 33.773 4.669 7.233 9.34e-12 ***
## SEMANA07 38.736 4.669 8.296 1.45e-14 ***
## SEMANA08 34.267 4.669 7.339 5.01e-12 ***
## SEMANA09 33.061 4.669 7.081 2.27e-11 ***
## SEMANA10 38.352 4.669 8.214 2.43e-14 ***
## SEMANA11 37.831 4.669 8.102 4.88e-14 ***
## SEMANA12 33.825 4.739 7.137 1.64e-11 ***
## SEMANA13 34.379 4.713 7.294 6.53e-12 ***
## SEMANA14 38.750 4.724 8.202 2.62e-14 ***
## SEMANA15 36.913 4.724 7.813 2.89e-13 ***
## SEMANA16 31.073 4.724 6.577 3.95e-10 ***
## SEMANA17 39.069 4.589 8.513 3.67e-15 ***
## SEMANA18 37.867 4.576 8.276 1.65e-14 ***
## SEMANA19 37.442 4.653 8.047 6.88e-14 ***
## SEMANA20 36.290 4.653 7.799 3.15e-13 ***
## SEMANA21 36.674 4.653 7.882 1.90e-13 ***
## SEMANA22 31.487 4.658 6.760 1.42e-10 ***
## SEMANA23 36.087 4.684 7.704 5.64e-13 ***
## SEMANA24 34.475 4.658 7.401 3.47e-12 ***
## SEMANA25 29.156 4.658 6.259 2.24e-09 ***
## SEMANA26 21.754 4.658 4.670 5.45e-06 ***
## SEMANA27 28.454 4.799 5.929 1.28e-08 ***
## SEMANA28 32.430 4.669 6.946 4.95e-11 ***
## SEMANA29 38.727 4.658 8.314 1.30e-14 ***
## SEMANA30 37.140 4.653 7.982 1.03e-13 ***
## SEMANA31 40.645 5.069 8.019 8.20e-14 ***
## SEMANA32 35.309 5.372 6.572 4.06e-10 ***
## SEMANA33 38.593 4.653 8.294 1.47e-14 ***
## SEMANA34 32.791 4.731 6.931 5.39e-11 ***
## SEMANA35 36.625 4.669 7.844 2.40e-13 ***
## SEMANA36 35.473 4.669 7.597 1.07e-12 ***
## SEMANA37 39.613 4.669 8.484 4.42e-15 ***
## SEMANA38 39.860 4.669 8.537 3.16e-15 ***
## SEMANA39 34.240 4.669 7.333 5.19e-12 ***
## SEMANA40 39.805 4.669 8.525 3.41e-15 ***
## SEMANA41 27.660 4.669 5.924 1.32e-08 ***
## SEMANA42 32.704 4.799 6.815 1.04e-10 ***
## SEMANA43 33.965 4.669 7.274 7.34e-12 ***
## SEMANA44 36.378 4.669 7.791 3.32e-13 ***
## SEMANA45 27.961 4.799 5.827 2.18e-08 ***
## SEMANA46 35.045 4.684 7.481 2.15e-12 ***
## SEMANA47 33.516 4.658 7.195 1.17e-11 ***
## SEMANA48 30.538 4.669 6.540 4.84e-10 ***
## SEMANA49 37.653 4.584 8.215 2.42e-14 ***
## SEMANA50 37.062 4.576 8.100 4.94e-14 ***
## SEMANA51 40.797 4.589 8.890 3.27e-16 ***
## SEMANA52 21.504 4.653 4.621 6.75e-06 ***
## SEMANA53 -23.471 3.837 -6.118 4.77e-09 ***
## Feria_Flores_Semana 12.990 4.994 2.601 0.009979 **
## Semana_Santa_Semana -19.077 6.063 -3.146 0.001901 **
## Feriados_Lunes -17.462 4.685 -3.727 0.000251 ***
## Feriados_Otros -21.096 6.934 -3.042 0.002656 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 52.47 on 204 degrees of freedom
## Multiple R-squared: 0.7685, Adjusted R-squared: 0.7039
## F-statistic: 11.88 on 57 and 204 DF, p-value: < 2.2e-16
Se guardan el modelo en un objeto de r
saveRDS(caret_lm_fit_s_final,"../Modelos/Prediccion_Total_Semanal.rds")
Modelo_Total_semanal<-readRDS(file="../Modelos/Prediccion_Total_Semanal.rds")
Se ajusta el modelo de Random Forest con todos los datos desde el 01-01-2014 al 31-12-2018
trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_m_final = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Total_Dataset_Freq_S,
method = "rf", trControl = trcntrl,
prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_m_final)
## Length Class Mode
## call 6 -none- call
## type 1 -none- character
## predicted 262 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 262 -none- numeric
## importance 57 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 68644 -none- numeric
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 262 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 57 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 2 -none- list
Se guardan el modelo en un objeto de r
saveRDS(caret_rf_fit_s_m_final,"../Modelos/Prediccion_Grave_Semanal.rds")
Modelo_Grave_semanal<-readRDS(file="../Modelos/Prediccion_Grave_Semanal.rds")
Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_sd_final = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Total_Dataset_Freq_S,
method = "lm", trControl = trcntrl,
preProcess=c("center", "scale"),
tuneLength = 10)
summary(caret_lm_fit_s_sd_final)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.080 -15.805 2.998 17.233 92.920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 358.676 1.860 192.862 < 2e-16 ***
## Ano_Base 6.327 1.864 3.394 0.000828 ***
## SEMANA02 7.029 2.714 2.590 0.010303 *
## SEMANA03 14.377 2.669 5.386 1.98e-07 ***
## SEMANA04 15.775 2.679 5.889 1.58e-08 ***
## SEMANA05 17.036 2.679 6.360 1.30e-09 ***
## SEMANA06 17.639 2.679 6.585 3.77e-10 ***
## SEMANA07 22.903 2.679 8.551 2.90e-15 ***
## SEMANA08 18.845 2.679 7.036 2.95e-11 ***
## SEMANA09 18.763 2.679 7.005 3.52e-11 ***
## SEMANA10 20.024 2.679 7.476 2.22e-12 ***
## SEMANA11 19.257 2.679 7.189 1.21e-11 ***
## SEMANA12 18.640 2.719 6.856 8.25e-11 ***
## SEMANA13 17.872 2.704 6.610 3.29e-10 ***
## SEMANA14 21.710 2.710 8.011 8.60e-14 ***
## SEMANA15 21.464 2.710 7.920 1.51e-13 ***
## SEMANA16 17.077 2.710 6.301 1.79e-09 ***
## SEMANA17 22.775 2.633 8.651 1.53e-15 ***
## SEMANA18 21.640 2.625 8.244 2.01e-14 ***
## SEMANA19 21.943 2.669 8.221 2.33e-14 ***
## SEMANA20 21.943 2.669 8.221 2.33e-14 ***
## SEMANA21 20.298 2.669 7.605 1.03e-12 ***
## SEMANA22 17.091 2.672 6.396 1.07e-09 ***
## SEMANA23 18.982 2.687 7.064 2.50e-11 ***
## SEMANA24 18.681 2.672 6.991 3.81e-11 ***
## SEMANA25 16.460 2.672 6.160 3.82e-09 ***
## SEMANA26 13.664 2.672 5.113 7.27e-07 ***
## SEMANA27 15.418 2.753 5.601 6.85e-08 ***
## SEMANA28 18.489 2.679 6.903 6.33e-11 ***
## SEMANA29 21.758 2.672 8.143 3.79e-14 ***
## SEMANA30 20.381 2.669 7.635 8.54e-13 ***
## SEMANA31 23.027 2.908 7.919 1.52e-13 ***
## SEMANA32 21.415 3.082 6.949 4.86e-11 ***
## SEMANA33 21.285 2.669 7.974 1.08e-13 ***
## SEMANA34 17.803 2.714 6.559 4.36e-10 ***
## SEMANA35 18.133 2.679 6.770 1.35e-10 ***
## SEMANA36 19.832 2.679 7.404 3.41e-12 ***
## SEMANA37 21.560 2.679 8.049 6.79e-14 ***
## SEMANA38 22.656 2.679 8.458 5.20e-15 ***
## SEMANA39 18.983 2.679 7.087 2.19e-11 ***
## SEMANA40 23.259 2.679 8.684 1.24e-15 ***
## SEMANA41 18.078 2.679 6.749 1.51e-10 ***
## SEMANA42 18.818 2.753 6.836 9.26e-11 ***
## SEMANA43 20.326 2.679 7.588 1.13e-12 ***
## SEMANA44 21.806 2.679 8.141 3.83e-14 ***
## SEMANA45 17.776 2.753 6.457 7.66e-10 ***
## SEMANA46 23.917 2.687 8.900 3.05e-16 ***
## SEMANA47 20.984 2.672 7.853 2.28e-13 ***
## SEMANA48 19.531 2.679 7.292 6.63e-12 ***
## SEMANA49 25.424 2.629 9.669 < 2e-16 ***
## SEMANA50 24.694 2.625 9.408 < 2e-16 ***
## SEMANA51 25.873 2.633 9.828 < 2e-16 ***
## SEMANA52 12.327 2.669 4.618 6.85e-06 ***
## SEMANA53 -7.951 2.201 -3.613 0.000382 ***
## Feria_Flores_Semana 5.950 2.865 2.077 0.039095 *
## Semana_Santa_Semana -8.482 3.478 -2.439 0.015601 *
## Feriados_Lunes -9.792 2.688 -3.643 0.000342 ***
## Feriados_Otros -11.805 3.978 -2.968 0.003359 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.1 on 204 degrees of freedom
## Multiple R-squared: 0.7417, Adjusted R-squared: 0.6695
## F-statistic: 10.27 on 57 and 204 DF, p-value: < 2.2e-16
Se guardan el modelo en un objeto de r
saveRDS(caret_lm_fit_s_sd_final,"../Modelos/Prediccion_leves_Semanal.rds")
Modelo_leves_semanal<-readRDS(file="../Modelos/Prediccion_leves_Semanal.rds")
Se oganizan los datos necesarios para el pronóstico de los accidentes en los años 2019, 2020 y 2021
Importación de los datos
load("../data/Dias_Especiales_Semanal.Rda")
datos_pronostico_semanal<-Dias_Especiales_Semanal[,c("ANO","Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")]
datos_pronostico_semanal$ANO1 <- datos_pronostico_semanal$ANO
datos_pronostico_semanal$SEMANA1 <- datos_pronostico_semanal$SEMANA
library(dplyr)
datos_pronostico_semanal<-unite_(datos_pronostico_semanal, "Ano_Sem", c("ANO1","SEMANA1"))
Predicción del Total de accidentes con el modelo de regresión lineal
datos_pronostico_semanal$prediccion_Total_s<-predict(Modelo_Total_semanal,datos_pronostico_semanal[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
Predicción de accidentes graves con el modelo de árbol de regresión
datos_pronostico_semanal$prediccion_Graves_s<-predict(Modelo_Grave_semanal,datos_pronostico_semanal[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
Predicción de accidentes leves con el modelo de regresión lineal
datos_pronostico_semanal$prediccion_Leves_s<-predict(Modelo_leves_semanal,datos_pronostico_semanal[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
Se guardan los datos de pronóstico en un objeto de r
save(datos_pronostico_semanal,file="../Modelos/datos_pronostico_semanal.Rda")